Доработан парсер
This commit is contained in:
parent
617b503a3b
commit
ae25f2e2a5
2
bot.py
2
bot.py
@ -48,7 +48,7 @@ def main() -> None:
|
||||
#loop = asyncio.get_event_loop()
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
#loop.create_task(scheduler())
|
||||
loop.create_task(scheduler())
|
||||
|
||||
if config.bot("use_webhook").lower() in ['t', 'true', '1', 'yes', 'y']:
|
||||
executor.start_webhook(
|
||||
|
@ -38,16 +38,12 @@ def docs_parse():
|
||||
page = requests.get(config.bot("link"), headers=headers)
|
||||
page.encoding = 'utf-8'
|
||||
|
||||
soup = BeautifulSoup(page.text, "html.parser")
|
||||
soup = BeautifulSoup(page.text, "lxml")
|
||||
|
||||
# Это в идеале нужно переписать...
|
||||
try: output = table_parser(soup, output); #print(output)
|
||||
except Exception: pass
|
||||
try: output = one_parser(soup, output); #print(output)
|
||||
except Exception: pass
|
||||
try: output = parser_two(soup, output); #print(output)
|
||||
#except Exception as e: pass
|
||||
#try: output = parser3(soup, output); print(output)
|
||||
try: output = test_parser(soup, output)
|
||||
except Exception as e: raise(e)
|
||||
|
||||
|
||||
|
@ -21,48 +21,30 @@ def table_parser(soup, output):
|
||||
|
||||
return output
|
||||
|
||||
def one_parser(soup, output):
|
||||
raw_data = soup.find("main").findAll("p")
|
||||
date = (
|
||||
raw_data[3].text.lower()
|
||||
.replace(u"\xa0", u"").replace("на", "").replace("\r", "")
|
||||
.replace("ЗАМІНИ ДО РОЗКЛАДУ".lower(), "").split("\n")
|
||||
)
|
||||
output["date"] = date[0].lstrip(" ")
|
||||
|
||||
for p in raw_data[4].text.replace(u"\xa0", u"").split("\n"):
|
||||
def text_parser(soup, output):
|
||||
main = soup.find("main")
|
||||
|
||||
text: str = ''
|
||||
for j in main:
|
||||
r_text = (
|
||||
j.text
|
||||
.replace(u"\xa0", u"")
|
||||
.lstrip(" ").lower()
|
||||
.replace("\r", "")
|
||||
.replace("увага! навчання дистанційно!!!", "")
|
||||
.replace("заміни до розкладу", "")
|
||||
)
|
||||
if r_text.replace("\n", "") == "": continue
|
||||
text += r_text
|
||||
|
||||
data = text.split("\n")
|
||||
|
||||
output["date"] = data[1]
|
||||
|
||||
for p in data[2:]:
|
||||
if p == "": continue
|
||||
data_rep = (p.lstrip(" ").split(" ", 1))
|
||||
group = data_rep[0]
|
||||
text = data_rep[1].replace("\r", "").lstrip(" ")
|
||||
output["data"][group] = text
|
||||
return output
|
||||
|
||||
def parser_two(soup, output):
|
||||
raw_data = soup.find("main").findAll("p")[2]
|
||||
data = raw_data.text.split("\n")
|
||||
output["date"] = data[1].replace("\r", "")
|
||||
|
||||
for p in data[3:]:
|
||||
r_data = p.split(maxsplit=1)
|
||||
try:
|
||||
group = r_data[0].replace(u"\xa0", u"").replace("\r", "")
|
||||
text = r_data[1].replace(u"\xa0", u"").replace("\r", "")
|
||||
except IndexError: break
|
||||
output["data"][group] = text
|
||||
return output
|
||||
|
||||
def parser3(soup, output):
|
||||
raw_data = soup.find("main").findAll("p")
|
||||
|
||||
output["date"] = (
|
||||
raw_data[2].text
|
||||
.replace("\r", "")
|
||||
.replace("ЗАМІНИ НА", "").lstrip(" ").rstrip(" ").lower()
|
||||
)
|
||||
for p in raw_data[5:]:
|
||||
r_data = p.text.split("-", maxsplit=1)
|
||||
group = r_data[0]
|
||||
text = r_data[1]
|
||||
output["data"][group] = text
|
||||
group, replaces = p.split(" ", maxsplit=1)
|
||||
output["data"][group] = replaces
|
||||
|
||||
return output
|
||||
|
@ -2,6 +2,7 @@
|
||||
#google-auth-httplib2
|
||||
#google-auth-oauthlib
|
||||
bs4
|
||||
lxml
|
||||
peewee
|
||||
aiogram
|
||||
cryptography
|
||||
|
Loading…
Reference in New Issue
Block a user