masterscraper/masterscraper/core/save.py

118 lines
4.1 KiB
Python

#!/usr/bin/env python3
import os
#--------[ Save Scrape Data ]--------#
def save(self):
if len(self.data) <= 1: return(-1) # Break if no data
key_main = 0
for i in range(0, len(self.info['keys'])):
if( self.data[0][i] == 'country.name' >= 0 or
self.data[0][i] == 'year' >= 0 ):
key_main = i
for key_data in range(0, len(self.data[0])):
if key_data != key_main:
#--------[ Generate Filename ]--------#
filename = self.info['keys'][key_data].replace('.','-')
filepath = 'data/{0}'.format(self.meta['type'])
if self.meta['type'] == 'historical': filepath += '/' + self.meta['scope'].lower().replace(' ','-')
filepath += '/{0}'.format(self.meta['category'])
if self.meta['subcategory'] != None: filepath += '/' + self.meta['subcategory']
if len(self.data[0]) > 4:
filepath += '/' + self.meta['name'].lower().replace(' ','-')
fullpath = filepath + '/' + filename + '.json'
#--------[ Check File Directory ]--------#
if not os.path.exists(filepath):
os.makedirs(filepath)
#--------[ Open File ]--------#
f = open(fullpath, "w")
f.write('{\n')
#--------[ Update Metadata ]--------#
self.meta['units'] = self.info['units'][key_data]
self.meta['year'] = self.info['year'][key_data]
if self.meta['scope'] == None:
self.meta['scope'] = self.info['scope'][key_data]
#--------[ Write Metadata ]
f.write(' "metadata" : {\n')
for i in self.meta:
if isinstance(self.meta[i], str):
f.write(' "{0}" : "{1}"'.format( i, self.meta[i] ))
elif self.meta[i] == None:
f.write(' "{0}" : null'.format( i ))
elif isinstance(self.meta[i], list):
if len(self.meta[i]) <= 0:
f.write(' "{0}" : []'.format( i ))
elif i == 'tags':
f.write(' "{0}" : ['.format( i ))
for j in self.meta[i]:
f.write('"{0}"'.format( j ))
if j != self.meta[i][-1]: f.write(',')
f.write(']'.format( i ))
else:
f.write(' "{0}" : [\n'.format( i ))
for j in self.meta[i]:
f.write(' "{0}"'.format( j ))
if j != self.meta[i][-1]: f.write(',\n')
else: f.write('\n')
f.write(' ]'.format( i ))
if i != list(self.meta.keys())[-1]: f.write(',\n')
else: f.write('\n')
f.write(' },\n')
#--------[ Write Actual Data ]--------#
f.write(' "data" : [\n')
if self.meta['type'] == 'historical':
f.write(' ["{0}","{1}"],\n'.format(
self.info['keys'][key_main],
self.meta['id'] + '.' + self.info['keys'][key_data])
)
else:
f.write(' ["{0}","{1}"],\n'.format(
self.info['keys'][key_main],
self.info['keys'][key_data])
)
for row in self.data[1:]:
col_a = row[key_main]
col_b = row[key_data]
if isinstance(col_a, str): col_a = '"{0}"'.format(col_a)
if isinstance(col_b, str): col_b = '"{0}"'.format(col_b)
if col_a == None: col_a = 'null'
if col_b == None: col_b = 'null'
f.write(' [{0},{1}]'.format(col_a, col_b))
if row != self.data[-1]: f.write(',\n')
else: f.write('\n')
f.write(' ]\n')
#--------[ Final Result ]--------#
f.write('}\n')
f.close()
print(' [{0} data points] -> {1}'.format(len(self.data)-1, fullpath))