# Analyse the errors by the distance from the nearest school, # similar to Q1, but we need to create groups ourselves, and we # do that using percentiles. import numpy as np from pythonql.helpers import print_table from model_eval import dataset,model1 # Compute mean and std of prices as usual mean = np.mean( [select d.price for d in dataset ]) std = np.std( [select d.price for d in dataset ]) # Compute percentiles and define a function that # will map distances to percentiles percs = [0,25,50,75,100] distances = [ select x.school_dist for x in dataset ] perc_values = [ select np.percentile(distances,x) for x in percs ] # Define a function that maps values to ranges to values, # according to the percentiles def get_interval(perc_vals, value): for i in range(len(perc_vals)): if value < perc_vals[i]: break return (perc_vals[i-1] as f, perc_vals[i] as t) # Compute the number of errors in each percentile res = [ select (range, segment_size, n_errors) for d in dataset, m in model1 where d.record_id == m.record_id let err = abs(d.price - m.pred) > std group by get_interval(perc_values, d.school_dist) as range let n_errors = sum(err), segment_size = len(err) order by n_errors/segment_size desc ] print_table(res)
Welcome to the PythonQL Web Demo
The Demo is organized into a number of scenarios that demonstrate the power and usability of PythonQL.
Each scenario illustrates a specific use case in data science that is addressed by PythonQL.