replace-bot/parser/utils.py

51 lines
1.3 KiB
Python
Raw Normal View History

2022-02-16 18:13:44 +03:00
def table_parser(soup, output):
#Date parser
date = (soup.find("main").findAll('span', style="color:black"))[1]
output["date"] = date.text.replace(u'\xa0', u'')
#Replaces parser
replaces = soup.findAll('tr')
for data in replaces:
text = (
data.find("td", valign="top")
.find("span", style="color:black")
.text.replace(u'\xa0', u'')
)
group = (
data.find("span", style="color:black")
.text.replace(" ", "").replace(u'\xa0', u''))
output["data"][group] = text
return output
2022-02-22 15:29:44 +03:00
def text_parser(soup, output):
main = soup.find("main")
text: str = ''
for j in main:
r_text = (
j.text
.replace(u"\xa0", u"")
.lstrip(" ").lower()
.replace("\r", "")
.replace("увага! навчання дистанційно!!!", "")
.replace("заміни до розкладу", "")
)
if r_text.replace("\n", "") == "": continue
text += r_text
2022-02-16 18:13:44 +03:00
2022-02-22 15:29:44 +03:00
data = text.split("\n")
2022-02-16 18:13:44 +03:00
2022-02-22 15:29:44 +03:00
output["date"] = data[1]
2022-02-16 18:13:44 +03:00
2022-02-22 15:29:44 +03:00
for p in data[2:]:
if p == "": continue
group, replaces = p.split(" ", maxsplit=1)
output["data"][group] = replaces
2022-02-16 18:13:44 +03:00
return output