Update parser and new feature
This commit is contained in:
@@ -1,14 +1,18 @@
|
||||
import requests
|
||||
import base64
|
||||
import json
|
||||
import datetime
|
||||
from datetime import datetime as dt
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
try:
|
||||
from load import config
|
||||
except: config = None
|
||||
from .utils import *
|
||||
except ImportError: config = None
|
||||
try:
|
||||
from .utils import *
|
||||
except ImportError:
|
||||
from utils import *
|
||||
|
||||
|
||||
headers = {
|
||||
@@ -41,10 +45,11 @@ def docs_parse():
|
||||
soup = BeautifulSoup(page.text, "lxml")
|
||||
|
||||
# Это в идеале нужно переписать...
|
||||
try: output = table_parser(soup, output); #print(output)
|
||||
except Exception: pass
|
||||
try: output = test_parser(soup, output)
|
||||
except Exception as e: raise(e)
|
||||
url = image_parser(soup)
|
||||
with requests.get(url=url, allow_redirects=True, stream=True) as r:
|
||||
output['image'] = True
|
||||
output['date'] = 'невозможно получить!'
|
||||
output['data']['all'] = base64.b64encode(r.content).decode('utf-8')
|
||||
|
||||
|
||||
with open(config.data_file, 'w') as f:
|
||||
@@ -57,3 +62,4 @@ def get_about_replacements() -> dict:
|
||||
data = json.loads(f.read())
|
||||
f.close()
|
||||
return data
|
||||
docs_parse()
|
@@ -1,5 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def table_parser(soup, output):
|
||||
def table_parser(soup: BeautifulSoup, output):
|
||||
#Date parser
|
||||
date = (soup.find("main").findAll('span', style="color:black"))[1]
|
||||
output["date"] = date.text.replace(u'\xa0', u'')
|
||||
@@ -22,29 +23,9 @@ def table_parser(soup, output):
|
||||
return output
|
||||
|
||||
|
||||
def text_parser(soup, output):
|
||||
main = soup.find("main")
|
||||
|
||||
text: str = ''
|
||||
for j in main:
|
||||
r_text = (
|
||||
j.text
|
||||
.replace(u"\xa0", u"")
|
||||
.lstrip(" ").lower()
|
||||
.replace("\r", "")
|
||||
.replace("увага! навчання дистанційно!!!", "")
|
||||
.replace("заміни до розкладу", "")
|
||||
)
|
||||
if r_text.replace("\n", "") == "": continue
|
||||
text += r_text
|
||||
|
||||
data = text.split("\n")
|
||||
|
||||
output["date"] = data[1]
|
||||
|
||||
for p in data[2:]:
|
||||
if p == "": continue
|
||||
group, replaces = p.split(" ", maxsplit=1)
|
||||
output["data"][group] = replaces
|
||||
def image_parser(soup: BeautifulSoup):
|
||||
main = soup.find("p", style="text-align:center; margin:0cm 0cm 8pt")
|
||||
image = main.select_one('img[src$=".jpg"]')
|
||||
output = image['src']
|
||||
|
||||
return output
|
||||
|
Reference in New Issue
Block a user