Update parser
This commit is contained in:
parent
478460525e
commit
8754cf841e
@ -67,6 +67,8 @@ def get_about_replacements() -> dict:
|
|||||||
try:
|
try:
|
||||||
count = document['body']["content"][element]["table"]["rows"]
|
count = document['body']["content"][element]["table"]["rows"]
|
||||||
except (IndexError, KeyError):
|
except (IndexError, KeyError):
|
||||||
|
image, image_bytes = helper.find_image(document)
|
||||||
|
if not image:
|
||||||
element = helper.find_with_table(document)
|
element = helper.find_with_table(document)
|
||||||
if element:
|
if element:
|
||||||
count = document['body']["content"][element]["table"]["rows"]
|
count = document['body']["content"][element]["table"]["rows"]
|
||||||
@ -101,6 +103,13 @@ def get_about_replacements() -> dict:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if image:
|
||||||
|
return {
|
||||||
|
"image": image,
|
||||||
|
'date': date if type(date) != type(False) else "Error" ,
|
||||||
|
'data': {"all": image_bytes},
|
||||||
|
'another_teacher': another_teacher,
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
'date': date if type(date) != type(False) else "Error" ,
|
'date': date if type(date) != type(False) else "Error" ,
|
||||||
'data': dict(info),
|
'data': dict(info),
|
||||||
|
@ -2,6 +2,8 @@ import os
|
|||||||
import datetime
|
import datetime
|
||||||
from datetime import datetime as dt
|
from datetime import datetime as dt
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from load import config
|
from load import config
|
||||||
|
|
||||||
|
|
||||||
@ -186,3 +188,25 @@ class Helper():
|
|||||||
return doc
|
return doc
|
||||||
|
|
||||||
element += 1
|
element += 1
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_link_and_download(cls, id_doc, document):
|
||||||
|
if "inlineObjects" in document:
|
||||||
|
if id_doc in document['inlineObjects']:
|
||||||
|
link = (document
|
||||||
|
['inlineObjects'][id_doc]['inlineObjectProperties']
|
||||||
|
['embeddedObject']['imageProperties']['contentUri'])
|
||||||
|
r = requests.get(link, stream=True)
|
||||||
|
return r.raw
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def find_image(cls, document):
|
||||||
|
for i in document['body']["content"]:
|
||||||
|
if ("paragraph" in i) and ("elements" in i["paragraph"]):
|
||||||
|
if "inlineObjectElement" in i["paragraph"]["elements"]:
|
||||||
|
return True, cls.get_link_and_download(
|
||||||
|
i["paragraph"]["elements"]
|
||||||
|
['inlineObjectElement']['inlineObjectId'], document)
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user