Init commit

This commit is contained in:
Jemacivan
2022-02-16 17:13:44 +02:00
commit 8060b933a5
42 changed files with 1281 additions and 0 deletions

2
parser/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
from .parser import get_about_replacements, docs_parse
__all__ = ['get_about_replacements', 'docs_parse']

63
parser/parser.py Normal file
View File

@@ -0,0 +1,63 @@
import requests
import json
import datetime
from datetime import datetime as dt
from bs4 import BeautifulSoup
try:
from load import config
except: config = None
from .utils import *
headers = {
'user-agent':(
"Mozilla/5.0 (Windows NT 10.0; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/62.0.3202.9 Safari/537.36"
)
}
def date_parser_helper(days:int, parse:str="%d.%m.20%y"):
return dt.strftime(
dt.now() +
datetime.timedelta(days=days),
parse
)
def docs_parse():
output = {
"data":{},
"another_teacher":None
}
page = requests.get(config.bot("link"), headers=headers)
page.encoding = 'utf-8'
soup = BeautifulSoup(page.text, "html.parser")
# Это в идеале нужно переписать...
try: output = table_parser(soup, output); #print(output)
except Exception: pass
try: output = one_parser(soup, output); #print(output)
except Exception: pass
try: output = parser_two(soup, output); print(output)
except Exception as e: raise(e)
#try: output = parser3(soup, output); print(output)
#except Exception as e: raise(e)
with open(config.data_file, 'w') as f:
json.dump(output, f, ensure_ascii=False)
f.close()
def get_about_replacements() -> dict:
with open(config.data_file, 'r') as f:
data = json.loads(f.read())
f.close()
return data

68
parser/utils.py Normal file
View File

@@ -0,0 +1,68 @@
def table_parser(soup, output):
#Date parser
date = (soup.find("main").findAll('span', style="color:black"))[1]
output["date"] = date.text.replace(u'\xa0', u'')
#Replaces parser
replaces = soup.findAll('tr')
for data in replaces:
text = (
data.find("td", valign="top")
.find("span", style="color:black")
.text.replace(u'\xa0', u'')
)
group = (
data.find("span", style="color:black")
.text.replace(" ", "").replace(u'\xa0', u''))
output["data"][group] = text
return output
def one_parser(soup, output):
raw_data = soup.find("main").findAll("p")
date = (
raw_data[3].find("span", style="font-size:16px;").b.text.lower()
.replace(u"\xa0", u"").replace("на", "").replace("\r", "")
.replace("ЗАМІНИ ДО РОЗКЛАДУ".lower(), "").split("\n")
)
output["date"] = date[1].lstrip(" ")
for p in raw_data[4].find("span",style="font-size:16px;").b.text.replace(u"\xa0", u"").split("\n"):
p = p.lstrip(" ")
data_rep = (p.lstrip(" ").split(" ", 1))
group = data_rep[0]
text = data_rep[1].replace("\r", "").lstrip(" ")
output["data"][group] = text
return output
def parser_two(soup, output):
raw_data = soup.find("main").findAll("p")[2]
data = raw_data.text.split("\n")
output["date"] = data[1].replace("\r", "")
for p in data[3:]:
r_data = p.split(maxsplit=1)
try:
group = r_data[0].replace(u"\xa0", u"").replace("\r", "")
text = r_data[1].replace(u"\xa0", u"").replace("\r", "")
except IndexError: break
output["data"][group] = text
return output
def parser3(soup, output):
raw_data = soup.find("main").findAll("p")
output["date"] = (
raw_data[2].text
.replace("\r", "")
.replace("ЗАМІНИ НА", "").lstrip(" ").rstrip(" ").lower()
)
for p in raw_data[5:]:
r_data = p.text.split("-", maxsplit=1)
group = r_data[0]
text = r_data[1]
output["data"][group] = text
return output