From 2b1fdfc8a0ba5bf729e9346f1b2ddb633364ddd9 Mon Sep 17 00:00:00 2001 From: tema Date: Tue, 5 Sep 2023 00:52:55 +0300 Subject: [PATCH 1/3] Requirements update --- requirements.txt | 63 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index 395417e..34511ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,50 @@ -#google-api-python-client -#google-auth-httplib2 -#google-auth-oauthlib -bs4 -requests -GitPython -lxml -peewee -aiogram -cryptography -pymysqldb -psycopg2 -aioschedule +aiogram==2.25.1 +aiohttp==3.8.4 +aioschedule==0.5.2 +aiosignal==1.3.1 +async-timeout==4.0.2 +attrs==22.2.0 +Babel==2.9.1 +beautifulsoup4==4.11.2 +bs4==0.0.1 +cachetools==5.3.1 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==3.0.1 +cryptography==39.0.1 +easydict==1.10 +frozenlist==1.3.3 +gitdb==4.0.10 +GitPython==3.1.30 +google-api-core==2.11.1 +google-api-python-client==2.97.0 +google-auth==2.22.0 +google-auth-httplib2==0.1.0 +google-auth-oauthlib==1.0.0 +googleapis-common-protos==1.60.0 +httplib2==0.22.0 +idna==3.4 +lxml==4.9.2 +magic-filter==1.0.9 +multidict==6.0.4 +oauthlib==3.2.2 +peewee==3.15.4 +protobuf==4.24.2 +psycopg2-binary==2.9.5 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pycparser==2.21 +PyMySQL==1.0.2 +PyMysqlDB==0.0.2 +pyparsing==3.1.1 +PySocks==1.7.1 +pytz==2022.7.1 +requests==2.31.0 +requests-oauthlib==1.3.1 +rsa==4.9 +six==1.16.0 +smmap==5.0.0 +soupsieve==2.3.2.post1 +uritemplate==4.1.1 +urllib3==1.26.14 +yarl==1.8.2 From a2477521faeb81a3ca60e38ddcd49694024e63b2 Mon Sep 17 00:00:00 2001 From: tema Date: Tue, 5 Sep 2023 00:54:53 +0300 Subject: [PATCH 2/3] Fix --- parser/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/utils.py b/parser/utils.py index ccb387c..e5ddfb3 100644 --- a/parser/utils.py +++ b/parser/utils.py @@ -205,9 +205,9 @@ class Helper(): def find_image(cls, document): for i in document['body']["content"]: if ("paragraph" in i) and ("elements" in i["paragraph"]): - if "inlineObjectElement" in i["paragraph"]["elements"]: + if "inlineObjectElement" in i["paragraph"]["elements"][0]: return True, cls.get_link_and_download( - i["paragraph"]["elements"] + i["paragraph"]["elements"][0] ['inlineObjectElement']['inlineObjectId'], document) return False, None From 32b23273bd90a44ac7b2b964e923fdce56bd509f Mon Sep 17 00:00:00 2001 From: tema Date: Tue, 5 Sep 2023 13:05:40 +0300 Subject: [PATCH 3/3] Configuration modifcation and parser photo saver --- configs/configure.py | 43 ++++++++++++++++++++++++++++--------------- parser/parser.py | 3 +++ parser/utils.py | 11 ++++++++++- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/configs/configure.py b/configs/configure.py index 37f296c..0fb4916 100644 --- a/configs/configure.py +++ b/configs/configure.py @@ -1,15 +1,25 @@ from configparser import ConfigParser -from easydict import EasyDict as edict +from typing import Any CONFIG_FILE = 'config.ini' -class Configure: +class Cfg: + def __getattr__(self, name: str) -> Any: + for key in self.data.keys(): + if name not in self.data[key]: + continue + return self.data[key][name] + raise NameError("Config options not found!") + + +class Configure(Cfg): def __init__(self): config = ConfigParser() config.read(CONFIG_FILE) + self.config = config self.data = dict() for section in config.sections(): @@ -18,17 +28,20 @@ class Configure: for key, value in config.items(section): self.data[section][key] = value - config_folder = config.get("Docs_Settings", "Config_folder").rstrip("/") - self.data["documentid"] = config.get("Docs_Settings", 'Document_ID') - self.data["data_file"] = config_folder + "/" + config.get("Docs_Settings", "data_file") - self.data["credentials_file"] = config_folder + "/" + config.get("Docs_Settings", "credentials_file") - self.data["token_file"] = self.config_folder + "/" + self.data['Docs_Settings']['token_file'] + self.config_folder = config.get("Docs_Settings", "Config_folder").rstrip("/") - def __getattr__(self, name): - if name in ["documentid", "data_file", "credentials_file", "token_file"]: - return self.data[name] - for key in self.data.keys(): - if name not in self.data[key]: - continue - return self.data[key][name] -# raise NameError("Config options not found!") + @property + def documentid(self): + return self.config.get("Docs_Settings", 'Document_ID') + + @property + def data_file(self): + return self.config_folder + "/" + self.config.get("Docs_Settings", "data_file") + + @property + def credentials_file(self): + return self.config_folder + "/" + self.config.get("Docs_Settings", "credentials_file") + + @property + def token_file(self): + return self.config_folder + "/" + self.data['Docs_Settings']['token_file'] \ No newline at end of file diff --git a/parser/parser.py b/parser/parser.py index e10656e..1d86b2d 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -45,6 +45,9 @@ def docs_parse() -> None: document = service.documents().get(documentId=config.documentid).execute() if os.path.exists(config.data_file): os.remove(config.data_file) + + with open("configs/temp.file", 'w') as f: + f.write("1") with open(config.data_file, 'w') as f: json.dump(document, f, ensure_ascii=False) diff --git a/parser/utils.py b/parser/utils.py index e5ddfb3..cb59a8e 100644 --- a/parser/utils.py +++ b/parser/utils.py @@ -193,13 +193,22 @@ class Helper(): @classmethod def get_link_and_download(cls, id_doc, document): + with open("configs/temp.file") as f: + data = f.read() + if data == "0": + return open("configs/photo.base64", 'rb').read() if "inlineObjects" in document: if id_doc in document['inlineObjects']: link = (document ['inlineObjects'][id_doc]['inlineObjectProperties'] ['embeddedObject']['imageProperties']['contentUri']) r = requests.get(link, stream=True) - return base64.b64encode(r.content).decode('utf-8') + photo = base64.b64encode(r.content).decode('utf-8') + with open("configs/photo.base64", 'w') as f: + f.write(photo) + with open("configs/temp.file", 'w') as f: + f.write("0") + return photo @classmethod def find_image(cls, document):