Pull correct description data from macrotrends
This commit is contained in:
parent
4b147cf8c2
commit
fbf29f9759
|
@ -97,6 +97,10 @@ class scrape:
|
|||
self.meta['name'] = self.meta['name'].strip()
|
||||
self.meta['name'] = self.meta['name'].title()
|
||||
|
||||
self.meta['name'] = self.meta['name'].replace('Gdp', 'GDP')
|
||||
self.meta['name'] = self.meta['name'].replace('Gni', 'GNI')
|
||||
self.meta['name'] = self.meta['name'].replace('Gnp', 'GNP')
|
||||
|
||||
|
||||
# Get Key Names
|
||||
self.data_info.append( [key for key in self.data[0]] )
|
||||
|
@ -152,12 +156,14 @@ class scrape:
|
|||
key.lower().find('€') >=0 ):
|
||||
key_unit.append('€')
|
||||
|
||||
elif( key.lower().find('km2') >=0 or
|
||||
elif( key.lower().find('area') >=0 or
|
||||
key.lower().find('land') >=0 or
|
||||
key.lower().find('km2') >=0 or
|
||||
key.lower().find('km²') >=0 or
|
||||
key.lower().find('mi2') >=0 or
|
||||
key.lower().find('mi²') >=0 ):
|
||||
|
||||
key_unit.append('km²')
|
||||
|
||||
elif( key.lower().find('country') >=0 or
|
||||
key.lower().find('countries') >=0 or
|
||||
key.lower().find('dependencies') >=0 ):
|
||||
|
@ -178,13 +184,29 @@ class scrape:
|
|||
|
||||
elif( key.lower().find('death') >=0 or
|
||||
key.lower().find('mortality') >=0 and
|
||||
key.lower().find('rate') >=0 ):
|
||||
key.lower().find('rate') >=0 and
|
||||
key.lower().find('infant') <0 and
|
||||
key.lower().find('maternal') <0 ):
|
||||
key_unit.append('deaths/1k population')
|
||||
|
||||
elif( key.lower().find('mortality') >=0 and
|
||||
key.lower().find('rate') >=0 and
|
||||
key.lower().find('infant') >=0 ):
|
||||
key_unit.append('deaths/100k live births')
|
||||
|
||||
elif( key.lower().find('mortality') >=0 and
|
||||
key.lower().find('rate') >=0 and
|
||||
key.lower().find('maternal') >=0 ):
|
||||
key_unit.append('deaths/1k live births')
|
||||
|
||||
elif( key.lower().find('birth') >=0 and
|
||||
key.lower().find('rate') >=0 ):
|
||||
key_unit.append('births/1k population')
|
||||
|
||||
elif( key.lower().find('fertility') >=0 and
|
||||
key.lower().find('rate') >=0 ):
|
||||
key_unit.append('children/women')
|
||||
|
||||
elif( key.lower().find('marriage') >=0 and
|
||||
key.lower().find('rate') >=0 ):
|
||||
key_unit.append('marriages/1k population')
|
||||
|
@ -201,6 +223,19 @@ class scrape:
|
|||
key.lower().find('rate') >=0 ):
|
||||
key_unit.append('murders/100k population')
|
||||
|
||||
elif( key.lower().find('military') >=0 and
|
||||
key.lower().find('size') >=0 ):
|
||||
key_unit.append('personel')
|
||||
|
||||
elif( key.lower().find('immigration') >=0 or
|
||||
key.lower().find('migration') >=0 or
|
||||
key.lower().find('refugee') >=0 and
|
||||
key.lower().find('rate') <0 ):
|
||||
key_unit.append('people')
|
||||
|
||||
elif( key.lower().find('emissions') >=0 ):
|
||||
key_unit.append('tonnes')
|
||||
|
||||
else:
|
||||
key_unit.append('unkown')
|
||||
self.data_info.append( key_unit )
|
||||
|
@ -349,6 +384,10 @@ class scrape:
|
|||
search.find('murder') >=0 ):
|
||||
self.meta['category'] = 'development'
|
||||
|
||||
#--------[ Crime ]--------#
|
||||
elif( search.find('military') >=0 ):
|
||||
self.meta['category'] = 'military'
|
||||
|
||||
#--------[ Uncategorised ]--------#
|
||||
else:
|
||||
self.meta['category'] = 'uncategorised'
|
||||
|
|
|
@ -59,7 +59,8 @@ def scrape(url, meta, data):
|
|||
|
||||
meta['name'] = url_parts[-1].replace('-',' ').title()
|
||||
|
||||
meta['description'] = soup.find('h1').text
|
||||
soup_desc = getpage( 'https://www.macrotrends.net/countries/ranking/' + url.split('/')[-1] )
|
||||
meta['description'] = soup_desc.find('div',class_='navigation_tabs').find('span').text
|
||||
|
||||
meta['authors'].append( soup.find('span', string='Data Source: ').next_sibling.text )
|
||||
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
|
||||
import masterscraper as ms
|
||||
|
||||
|
||||
scrapelist = ms.scrapelist('conf/wikipedia.txt')
|
||||
for url in scrapelist:
|
||||
scrape = ms.scrape(url)
|
||||
|
|
|
@ -5,7 +5,7 @@ import masterscraper as ms
|
|||
|
||||
|
||||
|
||||
scrape = ms.scrape('https://www.macrotrends.net/countries/TUR/turkey/population')
|
||||
scrape = ms.scrape('https://www.macrotrends.net/countries/SGP/singapore/gdp-per-capita')
|
||||
scrape.get_meta()
|
||||
scrape.clean()
|
||||
scrape.save()
|
||||
|
|
Loading…
Reference in New Issue