replace-bot/parser/utils.py
2023-09-05 00:54:53 +03:00

214 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import datetime
from datetime import datetime as dt
import base64
import requests
from load import config
def date_parser_helper(days:int, parse:str="%d.%m.20%y"):
return dt.strftime(
dt.now() +
datetime.timedelta(days=days),
parse
)
'''
self.months = {
1: "січень",
2: "лютий",
3: "березень",
4: "квітень",
5: "травень",
6: "червень",
7: "липень",
8: "серпень",
9: "вересень",
10: "жовтень",
11: "листопад",
12: "грудень"
}
'''
class Helper():
def __init__(self):
self.date_now = date_parser_helper(0)
self.date_next = date_parser_helper(1)
self.weekend_pass = date_parser_helper(2)
self.two_day_pass = date_parser_helper(3)
self.black_list = [
'черговий викладач',
self.date_now,
self.date_next,
self.weekend_pass,
self.two_day_pass
]
@staticmethod
def find_with_table(document):
c_element = 2
while True:
try:
document['body']["content"][c_element]["table"]["rows"]
break
except KeyError:
c_element += 1
if c_element > 15:
return False
except IndexError:
return False
with open("{}/table_element.txt".format(config.config_folder), 'w') as f:
f.write(str(c_element))
f.close()
return c_element
def find_with_text(self, document):
format_charset = '-'
alternative_format_charset = "\t"
element = 4
data = []
text = ''
while element < 15:
doc = (
document['body']["content"][element]
["paragraph"]["elements"][0]["textRun"]["content"]
).rstrip("\n").replace("", "-", 1)
if (
(
("-" in doc)
#and
#("\t" not in doc)
)
and
([p not in doc.lower() for p in self.black_list][0])
):
try:
group, text = doc.split(format_charset)
except ValueError:
if element > 6:
break
else:
try:
group, text = doc.split(alternative_format_charset)
except ValueError:
if element > 6:
break
if text != '':
data.append(
(group.strip(" "), text.lstrip(" ").replace("\t", ""))
)
element += 1
return data
def get_date(self, document):
date_element = 1
while date_element < 16:
try:
date = (
document['body']["content"][date_element]
["paragraph"]["elements"][0]["textRun"]["content"]
.rstrip(" \n"))
except:
date_element += 1
if (
(
(
self.date_now in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.date_next in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.weekend_pass in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
or
(
self.two_day_pass in date.lower()
.lstrip("заміни").lstrip("на").replace(" ", "")
)
)
or
(
"заміни на" in date.lower()
)
):
return date
else:
date_element += 1
return False
@staticmethod
def get_table_element():
if os.path.exists(f"{config.config_folder}/table_element.txt"):
element = int(
open(
f"{config.config_folder}/table_element.txt",
'r'
)
.read()
)
else:
element = 6
return element
@staticmethod
def teacher(document):
element = 1
while element < 6:
if "paragraph" in document['body']["content"][element]:
length_element = (len(document['body']["content"][element]
["paragraph"]["elements"]))
doc = (
document['body']["content"][element]["paragraph"]["elements"]
[0]["textRun"]["content"].rstrip("\n")
)
if 'черговий викладач' in doc.lower().replace("", ""):
return doc
elif length_element > 1:
for p in range(length_element):
doc = (
document['body']["content"][element]
["paragraph"]["elements"]
[p]["textRun"]["content"].rstrip("\n")
)
if 'черговий викладач' in doc.lower().replace("", ""):
return doc
element += 1
@classmethod
def get_link_and_download(cls, id_doc, document):
if "inlineObjects" in document:
if id_doc in document['inlineObjects']:
link = (document
['inlineObjects'][id_doc]['inlineObjectProperties']
['embeddedObject']['imageProperties']['contentUri'])
r = requests.get(link, stream=True)
return base64.b64encode(r.content).decode('utf-8')
@classmethod
def find_image(cls, document):
for i in document['body']["content"]:
if ("paragraph" in i) and ("elements" in i["paragraph"]):
if "inlineObjectElement" in i["paragraph"]["elements"][0]:
return True, cls.get_link_and_download(
i["paragraph"]["elements"][0]
['inlineObjectElement']['inlineObjectId'], document)
return False, None