57 lines
2.0 KiB
Python
57 lines
2.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
|
|
import re
|
|
|
|
# Check If String Is Number
|
|
def isfloat(num):
|
|
try:
|
|
float(num)
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
|
|
#--------[ Clean Scrape Data ]--------#
|
|
def clean(self):
|
|
if len(self.data) <= 1: return(-1) # Break if no data
|
|
|
|
|
|
for x in range(1, len(self.data)):
|
|
for y in range(0, len(self.data[x])):
|
|
self.data[x][y] = self.data[x][y]
|
|
|
|
# Remove any inline notes from data
|
|
if isinstance(self.data[x][y], str):
|
|
self.data[x][y] = re.sub('\[.*\]','', self.data[x][y])
|
|
self.data[x][y] = re.sub('\(.*\)','', self.data[x][y])
|
|
self.data[x][y] = re.sub(',','', self.data[x][y])
|
|
|
|
# Convert numerical strings to floats
|
|
if isinstance(self.data[x][y], str):
|
|
self.data[x][y] = self.data[x][y].strip()
|
|
if any(i.isdigit() for i in self.data[x][y]):
|
|
self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.' or i=='-'])
|
|
|
|
# Convert To Float
|
|
if isfloat(self.data[x][y]):
|
|
self.data[x][y] = float(self.data[x][y])
|
|
|
|
# Apply Variable Multiplyer
|
|
self.data[x][y] = self.data[x][y] * self.info['multiplyer'][y]
|
|
|
|
# Convert Whole Floats To Integers
|
|
if self.data[x][y].is_integer():
|
|
self.data[x][y] = int(self.data[x][y])
|
|
|
|
# Convert non-entries to null
|
|
if isinstance(self.data[x][y], str):
|
|
if( self.data[x][y].lower().find('not determined') >= 0 or
|
|
self.data[x][y].lower().find('negligible') >=0 or
|
|
self.data[x][y].lower().find('negligible') >=0 or
|
|
self.data[x][y].lower().find('unkown') >= 0 ):
|
|
self.data[x][y] = None
|
|
if( self.data[x][y] == '-' or
|
|
self.data[x][y] == '' ):
|
|
self.data[x][y] = None
|