replace-bot/parser/parser.py

124 lines
4.0 KiB
Python
Raw Permalink Normal View History

2023-09-04 23:34:52 +03:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
2022-02-16 18:13:44 +03:00
import json
2023-09-04 23:34:52 +03:00
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
from load import config
from .utils import Helper
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/documents.readonly']
2022-02-16 18:13:44 +03:00
2023-09-04 23:34:52 +03:00
__all__ = ['docs_parse', 'get_about_replacements']
2022-02-16 18:13:44 +03:00
2022-02-22 15:29:44 +03:00
2023-09-04 23:34:52 +03:00
def docs_parse() -> None:
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(config.token_file):
creds = Credentials.from_authorized_user_file(
config.token_file,
SCOPES
)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
config.credentials_file, SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open(config.token_file, 'w') as token:
token.write(creds.to_json())
service = build('docs', 'v1', credentials=creds)
# Retrieve the documents contents from the Docs service.
document = service.documents().get(documentId=config.documentid).execute()
if os.path.exists(config.data_file):
os.remove(config.data_file)
with open("configs/temp.file", 'w') as f:
f.write("1")
2022-02-16 18:13:44 +03:00
with open(config.data_file, 'w') as f:
2023-09-04 23:34:52 +03:00
json.dump(document, f, ensure_ascii=False)
2022-02-16 18:13:44 +03:00
f.close()
2023-09-04 23:34:52 +03:00
def read_parse_data():
2022-02-16 18:13:44 +03:00
with open(config.data_file, 'r') as f:
data = json.loads(f.read())
f.close()
return data
2023-09-04 23:34:52 +03:00
def get_about_replacements() -> dict:
helper = Helper()
document = read_parse_data()
info = []
element = helper.get_table_element()
try:
count = document['body']["content"][element]["table"]["rows"]
except (IndexError, KeyError):
image, image_bytes = helper.find_image(document)
if not image:
element = helper.find_with_table(document)
if element:
count = document['body']["content"][element]["table"]["rows"]
else:
info = helper.find_with_text(document)
if not image:
date = helper.get_date(document)
another_teacher = helper.teacher(document)
else:
date, another_teacher = False, None
if element and (not image):
for c in range(0, count):
more_replaces = (document['body']
["content"][element]["table"]
["tableRows"][c]["tableCells"][1]
["content"]
)
replaces = ''
for i in range(0, len(more_replaces)):
replaces += (document['body']["content"][element]["table"]
["tableRows"][c]["tableCells"][1]
["content"][i]["paragraph"]["elements"][0]
["textRun"]["content"].rstrip("\n"))
info.append(
(
document['body']["content"][element]["table"]
["tableRows"][c]["tableCells"][0]
["content"][0]["paragraph"]["elements"][0]
["textRun"]["content"].rstrip("\n"),
replaces
)
)
if image:
return {
"image": image,
'date': date if type(date) != type(False) else "Error" ,
'data': {"all": image_bytes},
'another_teacher': another_teacher,
}
return {
'date': date if type(date) != type(False) else "Error" ,
'data': dict(info),
'another_teacher': another_teacher,
}