replace-bot/parser/utils.py

32 lines
811 B
Python
Raw Normal View History

2022-10-07 17:50:42 +03:00
from bs4 import BeautifulSoup
2022-02-16 18:13:44 +03:00
2022-10-07 17:50:42 +03:00
def table_parser(soup: BeautifulSoup, output):
2022-02-16 18:13:44 +03:00
#Date parser
date = (soup.find("main").findAll('span', style="color:black"))[1]
output["date"] = date.text.replace(u'\xa0', u'')
#Replaces parser
replaces = soup.findAll('tr')
for data in replaces:
text = (
data.find("td", valign="top")
.find("span", style="color:black")
.text.replace(u'\xa0', u'')
)
group = (
data.find("span", style="color:black")
.text.replace(" ", "").replace(u'\xa0', u''))
output["data"][group] = text
return output
2022-10-07 17:50:42 +03:00
def image_parser(soup: BeautifulSoup):
2022-10-28 13:57:30 +03:00
main = soup.find("main")
2022-11-05 23:37:27 +03:00
image = main.find('img[src$=".jpg"]')
2022-10-07 17:50:42 +03:00
output = image['src']
2022-02-16 18:13:44 +03:00
return output