replace-bot/website-parser/utils.py

35 lines
923 B
Python
Raw Permalink Normal View History

2023-09-04 23:34:52 +03:00
from bs4 import BeautifulSoup
from typing import Any
def table_parser(soup: BeautifulSoup, output):
#Date parser
date = (soup.find("main").findAll('span', style="color:black"))[1]
output["date"] = date.text.replace(u'\xa0', u'')
#Replaces parser
replaces = soup.findAll('tr')
for data in replaces:
text = (
data.find("td", valign="top")
.find("span", style="color:black")
.text.replace(u'\xa0', u'')
)
group = (
data.find("span", style="color:black")
.text.replace(" ", "").replace(u'\xa0', u''))
output["data"][group] = text
return output
def image_parser(soup: BeautifulSoup):
image: Any
extension = ('png', 'jpg')
main = soup.find("main")
for ext in extension:
image = main.select(f'img[src$=".{ext}"]')
if image:
return image[0]['src']