35 lines
923 B
Python
35 lines
923 B
Python
|
from bs4 import BeautifulSoup
|
||
|
from typing import Any
|
||
|
|
||
|
def table_parser(soup: BeautifulSoup, output):
|
||
|
#Date parser
|
||
|
date = (soup.find("main").findAll('span', style="color:black"))[1]
|
||
|
output["date"] = date.text.replace(u'\xa0', u'')
|
||
|
|
||
|
|
||
|
#Replaces parser
|
||
|
replaces = soup.findAll('tr')
|
||
|
for data in replaces:
|
||
|
|
||
|
text = (
|
||
|
data.find("td", valign="top")
|
||
|
.find("span", style="color:black")
|
||
|
.text.replace(u'\xa0', u'')
|
||
|
)
|
||
|
group = (
|
||
|
data.find("span", style="color:black")
|
||
|
.text.replace(" ", "").replace(u'\xa0', u''))
|
||
|
output["data"][group] = text
|
||
|
|
||
|
return output
|
||
|
|
||
|
|
||
|
def image_parser(soup: BeautifulSoup):
|
||
|
image: Any
|
||
|
extension = ('png', 'jpg')
|
||
|
main = soup.find("main")
|
||
|
for ext in extension:
|
||
|
image = main.select(f'img[src$=".{ext}"]')
|
||
|
if image:
|
||
|
return image[0]['src']
|