first commit
This commit is contained in:
2
parser/__init__.py
Normal file
2
parser/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .parser import get_about_replacements, docs_parse
|
||||
__all__ = ['get_about_replacements', 'docs_parse']
|
67
parser/parser.py
Normal file
67
parser/parser.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import base64
|
||||
import json
|
||||
import datetime
|
||||
from datetime import datetime as dt
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
try:
|
||||
from load import config
|
||||
except ImportError: config = None
|
||||
try:
|
||||
from .utils import *
|
||||
except ImportError:
|
||||
from utils import *
|
||||
|
||||
|
||||
headers = {
|
||||
'user-agent':(
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/62.0.3202.9 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def date_parser_helper(days:int, parse:str="%d.%m.20%y"):
|
||||
return dt.strftime(
|
||||
dt.now() +
|
||||
datetime.timedelta(days=days),
|
||||
parse
|
||||
)
|
||||
|
||||
|
||||
def docs_parse():
|
||||
|
||||
output = {
|
||||
"data":{},
|
||||
"another_teacher":None
|
||||
}
|
||||
|
||||
page = requests.get(config.parser.link, headers=headers)
|
||||
page.encoding = 'utf-8'
|
||||
|
||||
soup = BeautifulSoup(page.text, "lxml")
|
||||
|
||||
# Это в идеале нужно переписать...
|
||||
url = image_parser(soup)
|
||||
with requests.get(url=url, allow_redirects=True, stream=True) as r:
|
||||
output['image'] = True
|
||||
output['date'] = 'невозможно получить!'
|
||||
output['data']['all'] = base64.b64encode(r.content).decode('utf-8')
|
||||
|
||||
|
||||
with open(config.data_file, 'w') as f:
|
||||
json.dump(output, f, ensure_ascii=False)
|
||||
f.close()
|
||||
|
||||
|
||||
def get_about_replacements() -> dict:
|
||||
with open(config.data_file, 'r') as f:
|
||||
data = json.loads(f.read())
|
||||
f.close()
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
docs_parse()
|
34
parser/utils.py
Normal file
34
parser/utils.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Any
|
||||
|
||||
def table_parser(soup: BeautifulSoup, output):
|
||||
#Date parser
|
||||
date = (soup.find("main").findAll('span', style="color:black"))[1]
|
||||
output["date"] = date.text.replace(u'\xa0', u'')
|
||||
|
||||
|
||||
#Replaces parser
|
||||
replaces = soup.findAll('tr')
|
||||
for data in replaces:
|
||||
|
||||
text = (
|
||||
data.find("td", valign="top")
|
||||
.find("span", style="color:black")
|
||||
.text.replace(u'\xa0', u'')
|
||||
)
|
||||
group = (
|
||||
data.find("span", style="color:black")
|
||||
.text.replace(" ", "").replace(u'\xa0', u''))
|
||||
output["data"][group] = text
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def image_parser(soup: BeautifulSoup):
|
||||
image: Any
|
||||
extension = ('png', 'jpg')
|
||||
main = soup.find("main")
|
||||
for ext in extension:
|
||||
image = main.select(f'img[src$=".{ext}"]')
|
||||
if image:
|
||||
return image[0]['src']
|
Reference in New Issue
Block a user