masterscraper/masterscraper/__init__.py

72 lines
2.8 KiB
Python

#!/usr/bin/python3
class scrape:
#--------[ Import Module Parts ]--------#
from . import wikipedia
from . import macrotrends
from .core.meta_name import meta_name
from .core.meta_search import meta_search
from .core.meta_keys import meta_keys
from .core.meta_year import meta_year
from .core.meta_units import meta_units
from .core.meta_multiplyer import meta_multiplyer
from .core.meta_scope import meta_scope
from .core.meta_category import meta_category
from .core.meta_type import meta_type
from .core.meta_tags import meta_tags
from .core.get_list import get_list
from .core.show import show
from .core.clean import clean
from .core.save import save
#--------[ Scrape Constructor Object ]--------#
def __init__(self, url):
print('\n[{0}]'.format(url))
self.meta = {
"name" : None, # Variable/Set name
"description" : None, # Description of variable/set
"units" : None, # Units of variable
"year" : None, # Year(s) of variable
"notes" : [], # Any notes related to the variable/set
"id" : None, # Official ID of applicable
"type" : None, # Type of variable/set
"scope" : None, # Scope of the variable/set
"category" : None, # Main category of the variable/set
"subcategory" : None, # Subcategory of the variable/set
"tags" : [], # Search tags applicable to the variable/set
"authors" : [], # Person or organisation responsible for the data
"sources" : [] # URL Sources for the data
}
self.data = [] # The actual data set
self.info = {} # Temoporary metadata extracted from the data set
if url.find('wikipedia.org') >=0: self.wikipedia.scrape(self, url )
if url.find('macrotrends.net') >=0: self.macrotrends.scrape(self, url )
#--------[ Scrape Deconstructor ]--------#
def __del__(self):
pass
#--------[ Get Metadata ]--------#
def get_meta(self):
if len(self.data) <= 1: return(-1) # Break if no data
self.meta_name() # Clean set name
self.meta_search() # Create search-space
self.meta_keys() # Extract variable key-name
self.meta_year() # Extract variable year
self.meta_units() # Extract variable unit
self.meta_multiplyer() # Extract variable multiplyer
self.meta_scope() # Extract variable scope
self.meta_category() # Extract set category
self.meta_type() # Extract set type
self.meta_tags() # Extract set tag