Process metadata name

This commit is contained in:
colttaine 2023-03-05 15:09:19 +11:00
parent 440fa2e4d4
commit bd916decd5
2 changed files with 11 additions and 2 deletions

View File

@ -1,6 +1,6 @@
# MasterScraper
MasterScraper is a webscraper writen in Python which is designed to pull data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library.
MasterScraper is a webscraper writen in Python which is designed to pull statistical data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library.
# License
This project is licensed under a GNU GPL3+NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md).
This project is licensed under a GNU GPL3 +NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md).

View File

@ -83,6 +83,13 @@ class scrape:
#--------[ Get Metadata ]--------#
def get_meta(self):
# Process Name
self.meta['name'] = self.meta['name'].lower()
self.meta['name'] = re.sub('and\ dependencies ','',self.meta['name'])
self.meta['name'] = re.sub('list\ of\ ','',self.meta['name'])
self.meta['name'] = self.meta['name'].strip()
self.meta['name'] = self.meta['name'].title()
# Get Key Names
self.data_info.append( [key for key in self.data[0]] )
@ -286,6 +293,8 @@ class scrape:
filename = self.data_info[1][key_data].replace('.','-')
filepath = 'data/{0}/{1}'.format(self.meta['type'], self.meta['category'])
if self.meta['subcategory'] != None: filepath = filepath + '/' + self.meta['subcategory']
if len(self.data[0]) > 4:
filepath = filepath + '/' + self.meta['name'].lower().replace(' ','-')
if not os.path.exists(filepath):
os.makedirs(filepath)