Process metadata name
This commit is contained in:
parent
440fa2e4d4
commit
bd916decd5
|
@ -1,6 +1,6 @@
|
|||
# MasterScraper
|
||||
MasterScraper is a webscraper writen in Python which is designed to pull data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library.
|
||||
MasterScraper is a webscraper writen in Python which is designed to pull statistical data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library.
|
||||
|
||||
# License
|
||||
This project is licensed under a GNU GPL3+NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md).
|
||||
This project is licensed under a GNU GPL3 +NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md).
|
||||
|
||||
|
|
|
@ -83,6 +83,13 @@ class scrape:
|
|||
#--------[ Get Metadata ]--------#
|
||||
def get_meta(self):
|
||||
|
||||
# Process Name
|
||||
self.meta['name'] = self.meta['name'].lower()
|
||||
self.meta['name'] = re.sub('and\ dependencies ','',self.meta['name'])
|
||||
self.meta['name'] = re.sub('list\ of\ ','',self.meta['name'])
|
||||
self.meta['name'] = self.meta['name'].strip()
|
||||
self.meta['name'] = self.meta['name'].title()
|
||||
|
||||
|
||||
# Get Key Names
|
||||
self.data_info.append( [key for key in self.data[0]] )
|
||||
|
@ -286,6 +293,8 @@ class scrape:
|
|||
filename = self.data_info[1][key_data].replace('.','-')
|
||||
filepath = 'data/{0}/{1}'.format(self.meta['type'], self.meta['category'])
|
||||
if self.meta['subcategory'] != None: filepath = filepath + '/' + self.meta['subcategory']
|
||||
if len(self.data[0]) > 4:
|
||||
filepath = filepath + '/' + self.meta['name'].lower().replace(' ','-')
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
os.makedirs(filepath)
|
||||
|
|
Loading…
Reference in New Issue