From bd916decd5da1205ebacf3e92a8d6d1e35d0da7f Mon Sep 17 00:00:00 2001 From: colttaine Date: Sun, 5 Mar 2023 15:09:19 +1100 Subject: [PATCH] Process metadata name --- README.md | 4 ++-- masterscraper/__init__.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 61456e5..3c1cbe4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # MasterScraper -MasterScraper is a webscraper writen in Python which is designed to pull data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library. +MasterScraper is a webscraper writen in Python which is designed to pull statistical data from a number of sources including [wikipedia](https://en.wikipedia.org/wiki/Main_Page) and [macrotrends](https://www.macrotrends.net/) and then format that data in JSON to be used with the [MasterSet](https://git.rol.so/colttaine/masterset) statistical library. # License -This project is licensed under a GNU GPL3+NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md). +This project is licensed under a GNU GPL3 +NIGGER license. For more information refer to [the included license file](https://git.rol.so/colttaine/masterscraper/src/branch/master/LICENSE.md). diff --git a/masterscraper/__init__.py b/masterscraper/__init__.py index 32744a8..828062a 100644 --- a/masterscraper/__init__.py +++ b/masterscraper/__init__.py @@ -83,6 +83,13 @@ class scrape: #--------[ Get Metadata ]--------# def get_meta(self): + # Process Name + self.meta['name'] = self.meta['name'].lower() + self.meta['name'] = re.sub('and\ dependencies ','',self.meta['name']) + self.meta['name'] = re.sub('list\ of\ ','',self.meta['name']) + self.meta['name'] = self.meta['name'].strip() + self.meta['name'] = self.meta['name'].title() + # Get Key Names self.data_info.append( [key for key in self.data[0]] ) @@ -286,6 +293,8 @@ class scrape: filename = self.data_info[1][key_data].replace('.','-') filepath = 'data/{0}/{1}'.format(self.meta['type'], self.meta['category']) if self.meta['subcategory'] != None: filepath = filepath + '/' + self.meta['subcategory'] + if len(self.data[0]) > 4: + filepath = filepath + '/' + self.meta['name'].lower().replace(' ','-') if not os.path.exists(filepath): os.makedirs(filepath)