Pull correct description data from macrotrends

This commit is contained in:
colttaine 2023-03-05 19:53:10 +11:00
parent 4b147cf8c2
commit fbf29f9759
4 changed files with 45 additions and 8 deletions

View File

@ -97,6 +97,10 @@ class scrape:
self.meta['name'] = self.meta['name'].strip()
self.meta['name'] = self.meta['name'].title()
self.meta['name'] = self.meta['name'].replace('Gdp', 'GDP')
self.meta['name'] = self.meta['name'].replace('Gni', 'GNI')
self.meta['name'] = self.meta['name'].replace('Gnp', 'GNP')
# Get Key Names
self.data_info.append( [key for key in self.data[0]] )
@ -152,12 +156,14 @@ class scrape:
key.lower().find('') >=0 ):
key_unit.append('')
elif( key.lower().find('km2') >=0 or
elif( key.lower().find('area') >=0 or
key.lower().find('land') >=0 or
key.lower().find('km2') >=0 or
key.lower().find('km²') >=0 or
key.lower().find('mi2') >=0 or
key.lower().find('mi²') >=0 ):
key_unit.append('km²')
elif( key.lower().find('country') >=0 or
key.lower().find('countries') >=0 or
key.lower().find('dependencies') >=0 ):
@ -178,13 +184,29 @@ class scrape:
elif( key.lower().find('death') >=0 or
key.lower().find('mortality') >=0 and
key.lower().find('rate') >=0 ):
key.lower().find('rate') >=0 and
key.lower().find('infant') <0 and
key.lower().find('maternal') <0 ):
key_unit.append('deaths/1k population')
elif( key.lower().find('mortality') >=0 and
key.lower().find('rate') >=0 and
key.lower().find('infant') >=0 ):
key_unit.append('deaths/100k live births')
elif( key.lower().find('mortality') >=0 and
key.lower().find('rate') >=0 and
key.lower().find('maternal') >=0 ):
key_unit.append('deaths/1k live births')
elif( key.lower().find('birth') >=0 and
key.lower().find('rate') >=0 ):
key_unit.append('births/1k population')
elif( key.lower().find('fertility') >=0 and
key.lower().find('rate') >=0 ):
key_unit.append('children/women')
elif( key.lower().find('marriage') >=0 and
key.lower().find('rate') >=0 ):
key_unit.append('marriages/1k population')
@ -201,6 +223,19 @@ class scrape:
key.lower().find('rate') >=0 ):
key_unit.append('murders/100k population')
elif( key.lower().find('military') >=0 and
key.lower().find('size') >=0 ):
key_unit.append('personel')
elif( key.lower().find('immigration') >=0 or
key.lower().find('migration') >=0 or
key.lower().find('refugee') >=0 and
key.lower().find('rate') <0 ):
key_unit.append('people')
elif( key.lower().find('emissions') >=0 ):
key_unit.append('tonnes')
else:
key_unit.append('unkown')
self.data_info.append( key_unit )
@ -349,6 +384,10 @@ class scrape:
search.find('murder') >=0 ):
self.meta['category'] = 'development'
#--------[ Crime ]--------#
elif( search.find('military') >=0 ):
self.meta['category'] = 'military'
#--------[ Uncategorised ]--------#
else:
self.meta['category'] = 'uncategorised'

View File

@ -59,7 +59,8 @@ def scrape(url, meta, data):
meta['name'] = url_parts[-1].replace('-',' ').title()
meta['description'] = soup.find('h1').text
soup_desc = getpage( 'https://www.macrotrends.net/countries/ranking/' + url.split('/')[-1] )
meta['description'] = soup_desc.find('div',class_='navigation_tabs').find('span').text
meta['authors'].append( soup.find('span', string='Data Source: ').next_sibling.text )

View File

@ -1,9 +1,6 @@
#!/usr/bin/python3
import masterscraper as ms
scrapelist = ms.scrapelist('conf/wikipedia.txt')
for url in scrapelist:
scrape = ms.scrape(url)

View File

@ -5,7 +5,7 @@ import masterscraper as ms
scrape = ms.scrape('https://www.macrotrends.net/countries/TUR/turkey/population')
scrape = ms.scrape('https://www.macrotrends.net/countries/SGP/singapore/gdp-per-capita')
scrape.get_meta()
scrape.clean()
scrape.save()