#!/usr/bin/env python3 import re # Check If String Is Number def isfloat(num): try: float(num) return True except: return False #--------[ Clean Scrape Data ]--------# def clean(self): if len(self.data) <= 1: return(-1) # Break if no data for x in range(1, len(self.data)): for y in range(0, len(self.data[x])): self.data[x][y] = self.data[x][y] # Remove any inline notes from data if isinstance(self.data[x][y], str): self.data[x][y] = re.sub('\[.*\]','', self.data[x][y]) self.data[x][y] = re.sub('\(.*\)','', self.data[x][y]) self.data[x][y] = re.sub(',','', self.data[x][y]) # Convert numerical strings to floats if isinstance(self.data[x][y], str): self.data[x][y] = self.data[x][y].strip() if any(i.isdigit() for i in self.data[x][y]): self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.' or i=='-']) # Convert To Float if isfloat(self.data[x][y]): self.data[x][y] = float(self.data[x][y]) # Apply Variable Multiplyer self.data[x][y] = self.data[x][y] * self.info['multiplyer'][y] # Convert Whole Floats To Integers if self.data[x][y].is_integer(): self.data[x][y] = int(self.data[x][y]) # Convert non-entries to null if isinstance(self.data[x][y], str): if( self.data[x][y].lower().find('not determined') >= 0 or self.data[x][y].lower().find('negligible') >=0 or self.data[x][y].lower().find('negligible') >=0 or self.data[x][y].lower().find('unkown') >= 0 ): self.data[x][y] = None if( self.data[x][y] == '-' or self.data[x][y] == '' ): self.data[x][y] = None