diff --git a/backup/google_parser/parser/parser.py b/backup/google_parser/parser/parser.py index 7c728ce..dbed76e 100644 --- a/backup/google_parser/parser/parser.py +++ b/backup/google_parser/parser/parser.py @@ -67,11 +67,13 @@ def get_about_replacements() -> dict: try: count = document['body']["content"][element]["table"]["rows"] except (IndexError, KeyError): - element = helper.find_with_table(document) - if element: - count = document['body']["content"][element]["table"]["rows"] - else: - info = helper.find_with_text(document) + image, image_bytes = helper.find_image(document) + if not image: + element = helper.find_with_table(document) + if element: + count = document['body']["content"][element]["table"]["rows"] + else: + info = helper.find_with_text(document) date = helper.get_date(document) @@ -101,6 +103,13 @@ def get_about_replacements() -> dict: ) ) + if image: + return { + "image": image, + 'date': date if type(date) != type(False) else "Error" , + 'data': {"all": image_bytes}, + 'another_teacher': another_teacher, + } return { 'date': date if type(date) != type(False) else "Error" , 'data': dict(info), diff --git a/backup/google_parser/parser/utils.py b/backup/google_parser/parser/utils.py index ca47ff2..1240132 100644 --- a/backup/google_parser/parser/utils.py +++ b/backup/google_parser/parser/utils.py @@ -2,6 +2,8 @@ import os import datetime from datetime import datetime as dt +import requests + from load import config @@ -186,3 +188,25 @@ class Helper(): return doc element += 1 + + + @classmethod + def get_link_and_download(cls, id_doc, document): + if "inlineObjects" in document: + if id_doc in document['inlineObjects']: + link = (document + ['inlineObjects'][id_doc]['inlineObjectProperties'] + ['embeddedObject']['imageProperties']['contentUri']) + r = requests.get(link, stream=True) + return r.raw + + @classmethod + def find_image(cls, document): + for i in document['body']["content"]: + if ("paragraph" in i) and ("elements" in i["paragraph"]): + if "inlineObjectElement" in i["paragraph"]["elements"]: + return True, cls.get_link_and_download( + i["paragraph"]["elements"] + ['inlineObjectElement']['inlineObjectId'], document) + return False, None +