replace-bot/parser/parser.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import json

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials

from load import config
from .utils import Helper

# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/documents.readonly']

__all__ = ['docs_parse', 'get_about_replacements']


def docs_parse() -> None:
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists(config.token_file):
        creds = Credentials.from_authorized_user_file(
            config.token_file,
            SCOPES
        )
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                config.credentials_file, SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open(config.token_file, 'w') as token:
            token.write(creds.to_json())

    service = build('docs', 'v1', credentials=creds)

    # Retrieve the documents contents from the Docs service.
    document = service.documents().get(documentId=config.documentid).execute()
    if os.path.exists(config.data_file):
        os.remove(config.data_file)
    
    with open("configs/temp.file", 'w') as f:
        f.write("1")

    with open(config.data_file, 'w') as f:
        json.dump(document, f, ensure_ascii=False)
        f.close()


def read_parse_data():
    with open(config.data_file, 'r') as f:
        data = json.loads(f.read())
        f.close()
        return data


def get_about_replacements() -> dict:
    helper = Helper()
    document = read_parse_data()
    info = []
    element = helper.get_table_element()

    try:
        count = document['body']["content"][element]["table"]["rows"]
    except (IndexError, KeyError):
        image, image_bytes = helper.find_image(document)
        if not image:
            element = helper.find_with_table(document)
            if element:
                count = document['body']["content"][element]["table"]["rows"]
            else:
                info = helper.find_with_text(document)

    if not image:
        date = helper.get_date(document)

        another_teacher = helper.teacher(document)
    else:
        date, another_teacher = False, None

    if element and (not image):
        for c in range(0, count):
            more_replaces = (document['body']
                ["content"][element]["table"]
                ["tableRows"][c]["tableCells"][1]
                ["content"]
            )
            replaces = ''
            for i in range(0, len(more_replaces)):
                replaces += (document['body']["content"][element]["table"]
                        ["tableRows"][c]["tableCells"][1]
                        ["content"][i]["paragraph"]["elements"][0]
                        ["textRun"]["content"].rstrip("\n"))
            
            info.append(
                (
                    document['body']["content"][element]["table"]
                    ["tableRows"][c]["tableCells"][0]
                    ["content"][0]["paragraph"]["elements"][0]
                    ["textRun"]["content"].rstrip("\n"),
                    replaces
                )
            )

    if image:
        return {
            "image": image,
            'date': date if type(date) != type(False) else "Error" ,
            'data': {"all": image_bytes},
            'another_teacher': another_teacher,
        }
    return {
        'date': date if type(date) != type(False) else "Error" ,
        'data': dict(info),
        'another_teacher': another_teacher,
    }
G parser 2023-09-04 23:34:52 +03:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`
			`import os`
Init commit 2022-02-16 18:13:44 +03:00			`import json`

G parser 2023-09-04 23:34:52 +03:00			`from googleapiclient.discovery import build`
			`from google_auth_oauthlib.flow import InstalledAppFlow`
			`from google.auth.transport.requests import Request`
			`from google.oauth2.credentials import Credentials`
Init commit 2022-02-16 18:13:44 +03:00
G parser 2023-09-04 23:34:52 +03:00			`from load import config`
			`from .utils import Helper`
Init commit 2022-02-16 18:13:44 +03:00
G parser 2023-09-04 23:34:52 +03:00			`# If modifying these scopes, delete the file token.json.`
			`SCOPES = ['https://www.googleapis.com/auth/documents.readonly']`
Init commit 2022-02-16 18:13:44 +03:00
G parser 2023-09-04 23:34:52 +03:00			`__all__ = ['docs_parse', 'get_about_replacements']`
Init commit 2022-02-16 18:13:44 +03:00
Доработан парсер 2022-02-22 15:29:44 +03:00
G parser 2023-09-04 23:34:52 +03:00			`def docs_parse() -> None:`
			`creds = None`
			`# The file token.json stores the user's access and refresh tokens, and is`
			`# created automatically when the authorization flow completes for the first`
			`# time.`
			`if os.path.exists(config.token_file):`
			`creds = Credentials.from_authorized_user_file(`
			`config.token_file,`
			`SCOPES`
			`)`
			`# If there are no (valid) credentials available, let the user log in.`
			`if not creds or not creds.valid:`
			`if creds and creds.expired and creds.refresh_token:`
			`creds.refresh(Request())`
			`else:`
			`flow = InstalledAppFlow.from_client_secrets_file(`
			`config.credentials_file, SCOPES)`
			`creds = flow.run_local_server(port=0)`
			`# Save the credentials for the next run`
			`with open(config.token_file, 'w') as token:`
			`token.write(creds.to_json())`

			`service = build('docs', 'v1', credentials=creds)`

			`# Retrieve the documents contents from the Docs service.`
			`document = service.documents().get(documentId=config.documentid).execute()`
			`if os.path.exists(config.data_file):`
			`os.remove(config.data_file)`
Configuration modifcation and parser photo saver 2023-09-05 13:05:40 +03:00
			`with open("configs/temp.file", 'w') as f:`
			`f.write("1")`
Init commit 2022-02-16 18:13:44 +03:00
			`with open(config.data_file, 'w') as f:`
G parser 2023-09-04 23:34:52 +03:00			`json.dump(document, f, ensure_ascii=False)`
Init commit 2022-02-16 18:13:44 +03:00			`f.close()`


G parser 2023-09-04 23:34:52 +03:00			`def read_parse_data():`
Init commit 2022-02-16 18:13:44 +03:00			`with open(config.data_file, 'r') as f:`
			`data = json.loads(f.read())`
			`f.close()`
			`return data`
G parser 2023-09-04 23:34:52 +03:00

			`def get_about_replacements() -> dict:`
			`helper = Helper()`
			`document = read_parse_data()`
			`info = []`
			`element = helper.get_table_element()`

			`try:`
			`count = document['body']["content"][element]["table"]["rows"]`
			`except (IndexError, KeyError):`
			`image, image_bytes = helper.find_image(document)`
			`if not image:`
			`element = helper.find_with_table(document)`
			`if element:`
			`count = document['body']["content"][element]["table"]["rows"]`
			`else:`
			`info = helper.find_with_text(document)`

			`if not image:`
			`date = helper.get_date(document)`

			`another_teacher = helper.teacher(document)`
			`else:`
			`date, another_teacher = False, None`

			`if element and (not image):`
			`for c in range(0, count):`
			`more_replaces = (document['body']`
			`["content"][element]["table"]`
			`["tableRows"][c]["tableCells"][1]`
			`["content"]`
			`)`
			`replaces = ''`
			`for i in range(0, len(more_replaces)):`
			`replaces += (document['body']["content"][element]["table"]`
			`["tableRows"][c]["tableCells"][1]`
			`["content"][i]["paragraph"]["elements"][0]`
			`["textRun"]["content"].rstrip("\n"))`

			`info.append(`
			`(`
			`document['body']["content"][element]["table"]`
			`["tableRows"][c]["tableCells"][0]`
			`["content"][0]["paragraph"]["elements"][0]`
			`["textRun"]["content"].rstrip("\n"),`
			`replaces`
			`)`
			`)`

			`if image:`
			`return {`
			`"image": image,`
			`'date': date if type(date) != type(False) else "Error" ,`
			`'data': {"all": image_bytes},`
			`'another_teacher': another_teacher,`
			`}`
			`return {`
			`'date': date if type(date) != type(False) else "Error" ,`
			`'data': dict(info),`
			`'another_teacher': another_teacher,`
			`}`