replace-bot/parser/utils.py

214 lines
6.5 KiB
Python
Raw Normal View History

2023-09-04 23:34:52 +03:00
import os
import datetime
from datetime import datetime as dt
2023-09-05 00:18:35 +03:00
import base64
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
import requests
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
from load import config
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
def date_parser_helper(days:int, parse:str="%d.%m.20%y"):
return dt.strftime(
dt.now() +
datetime.timedelta(days=days),
parse
2022-02-16 18:13:44 +03:00
)
2023-09-04 23:34:52 +03:00
'''
self.months = {
1: "січень",
2: "лютий",
3: "березень",
4: "квітень",
5: "травень",
6: "червень",
7: "липень",
8: "серпень",
9: "вересень",
10: "жовтень",
11: "листопад",
12: "грудень"
}
'''
class Helper():
def __init__(self):
self.date_now = date_parser_helper(0)
self.date_next = date_parser_helper(1)
self.weekend_pass = date_parser_helper(2)
self.two_day_pass = date_parser_helper(3)
self.black_list = [
'черговий викладач',
self.date_now,
self.date_next,
self.weekend_pass,
self.two_day_pass
]
@staticmethod
def find_with_table(document):
c_element = 2
while True:
try:
document['body']["content"][c_element]["table"]["rows"]
break
except KeyError:
c_element += 1
if c_element > 15:
return False
except IndexError:
return False
with open("{}/table_element.txt".format(config.config_folder), 'w') as f:
f.write(str(c_element))
f.close()
return c_element
def find_with_text(self, document):
format_charset = '-'
alternative_format_charset = "\t"
element = 4
data = []
text = ''
while element < 15:
doc = (
document['body']["content"][element]
["paragraph"]["elements"][0]["textRun"]["content"]
).rstrip("\n").replace("", "-", 1)
if (
(
("-" in doc)
#and
#("\t" not in doc)
)
and
([p not in doc.lower() for p in self.black_list][0])
):
try:
group, text = doc.split(format_charset)
except ValueError:
if element > 6:
break
else:
try:
group, text = doc.split(alternative_format_charset)
except ValueError:
if element > 6:
break
if text != '':
data.append(
(group.strip(" "), text.lstrip(" ").replace("\t", ""))
)
element += 1
return data
def get_date(self, document):
date_element = 1
while date_element < 16:
try:
date = (
document['body']["content"][date_element]
["paragraph"]["elements"][0]["textRun"]["content"]
.rstrip(" \n"))
except:
date_element += 1
if (
(
(
self.date_now in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.date_next in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.weekend_pass in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.two_day_pass in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
)
or
(
"заміни на" in date.lower()
)
):
return date
else:
date_element += 1
return False
@staticmethod
def get_table_element():
if os.path.exists(f"{config.config_folder}/table_element.txt"):
element = int(
open(
f"{config.config_folder}/table_element.txt",
'r'
)
.read()
)
else:
element = 6
return element
@staticmethod
def teacher(document):
element = 1
while element < 6:
if "paragraph" in document['body']["content"][element]:
length_element = (len(document['body']["content"][element]
["paragraph"]["elements"]))
doc = (
document['body']["content"][element]["paragraph"]["elements"]
[0]["textRun"]["content"].rstrip("\n")
)
if 'черговий викладач' in doc.lower().replace("", ""):
return doc
elif length_element > 1:
for p in range(length_element):
doc = (
document['body']["content"][element]
["paragraph"]["elements"]
[p]["textRun"]["content"].rstrip("\n")
)
if 'черговий викладач' in doc.lower().replace("", ""):
return doc
element += 1
2023-09-05 00:35:11 +03:00
@classmethod
def get_link_and_download(cls, id_doc, document):
if "inlineObjects" in document:
if id_doc in document['inlineObjects']:
link = (document
['inlineObjects'][id_doc]['inlineObjectProperties']
['embeddedObject']['imageProperties']['contentUri'])
r = requests.get(link, stream=True)
return base64.b64encode(r.content).decode('utf-8')
2023-09-04 23:34:52 +03:00
@classmethod
def find_image(cls, document):
for i in document['body']["content"]:
if ("paragraph" in i) and ("elements" in i["paragraph"]):
2023-09-05 00:35:11 +03:00
if "inlineObjectElement" in i["paragraph"]["elements"]:
return True, cls.get_link_and_download(
i["paragraph"]["elements"]
['inlineObjectElement']['inlineObjectId'], document)
2023-09-04 23:34:52 +03:00
return False, None