diff --git a/bot.py b/bot.py index 3ea348d..0d74980 100755 --- a/bot.py +++ b/bot.py @@ -48,7 +48,7 @@ def main() -> None: #loop = asyncio.get_event_loop() loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - #loop.create_task(scheduler()) + loop.create_task(scheduler()) if config.bot("use_webhook").lower() in ['t', 'true', '1', 'yes', 'y']: executor.start_webhook( diff --git a/parser/parser.py b/parser/parser.py index 28d066e..937bb76 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -38,18 +38,14 @@ def docs_parse(): page = requests.get(config.bot("link"), headers=headers) page.encoding = 'utf-8' - soup = BeautifulSoup(page.text, "html.parser") + soup = BeautifulSoup(page.text, "lxml") # Это в идеале нужно переписать... try: output = table_parser(soup, output); #print(output) except Exception: pass - try: output = one_parser(soup, output); #print(output) - except Exception: pass - try: output = parser_two(soup, output); #print(output) - #except Exception as e: pass - #try: output = parser3(soup, output); print(output) + try: output = test_parser(soup, output) except Exception as e: raise(e) - + with open(config.data_file, 'w') as f: json.dump(output, f, ensure_ascii=False) diff --git a/parser/utils.py b/parser/utils.py index 933d6ca..01b7cf8 100644 --- a/parser/utils.py +++ b/parser/utils.py @@ -21,48 +21,30 @@ def table_parser(soup, output): return output -def one_parser(soup, output): - raw_data = soup.find("main").findAll("p") - date = ( - raw_data[3].text.lower() - .replace(u"\xa0", u"").replace("на", "").replace("\r", "") - .replace("ЗАМІНИ ДО РОЗКЛАДУ".lower(), "").split("\n") - ) - output["date"] = date[0].lstrip(" ") - for p in raw_data[4].text.replace(u"\xa0", u"").split("\n"): +def text_parser(soup, output): + main = soup.find("main") + + text: str = '' + for j in main: + r_text = ( + j.text + .replace(u"\xa0", u"") + .lstrip(" ").lower() + .replace("\r", "") + .replace("увага! навчання дистанційно!!!", "") + .replace("заміни до розкладу", "") + ) + if r_text.replace("\n", "") == "": continue + text += r_text + + data = text.split("\n") + + output["date"] = data[1] + + for p in data[2:]: if p == "": continue - data_rep = (p.lstrip(" ").split(" ", 1)) - group = data_rep[0] - text = data_rep[1].replace("\r", "").lstrip(" ") - output["data"][group] = text - return output - -def parser_two(soup, output): - raw_data = soup.find("main").findAll("p")[2] - data = raw_data.text.split("\n") - output["date"] = data[1].replace("\r", "") - - for p in data[3:]: - r_data = p.split(maxsplit=1) - try: - group = r_data[0].replace(u"\xa0", u"").replace("\r", "") - text = r_data[1].replace(u"\xa0", u"").replace("\r", "") - except IndexError: break - output["data"][group] = text - return output - -def parser3(soup, output): - raw_data = soup.find("main").findAll("p") - - output["date"] = ( - raw_data[2].text - .replace("\r", "") - .replace("ЗАМІНИ НА", "").lstrip(" ").rstrip(" ").lower() - ) - for p in raw_data[5:]: - r_data = p.text.split("-", maxsplit=1) - group = r_data[0] - text = r_data[1] - output["data"][group] = text + group, replaces = p.split(" ", maxsplit=1) + output["data"][group] = replaces + return output diff --git a/requirements.txt b/requirements.txt index 6ab5307..14a7e09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ #google-auth-httplib2 #google-auth-oauthlib bs4 +lxml peewee aiogram cryptography