outlier_cleaner.py 894 B

1234567891011121314151617181920212223242526272829
  1. #!/usr/bin/python
  2. import math
  3. def outlierCleaner(predictions, ages, net_worths):
  4. """
  5. Clean away the 10% of points that have the largest
  6. residual errors (difference between the prediction
  7. and the actual net worth).
  8. Return a list of tuples named cleaned_data where
  9. each tuple is of the form (age, net_worth, error).
  10. """
  11. #cleaned_data = []
  12. temp = []
  13. #print len(predictions)
  14. ### your code goes here
  15. for num in range(0,len(predictions)):
  16. error = abs(predictions[num] - net_worths[num])
  17. temp.append([ages[num],net_worths[num],error])
  18. temp = sorted(temp, key=lambda x: (x[2]))
  19. #print temp,len(temp)
  20. cuted_num = 0 - int(math.ceil(len(temp)/10))
  21. #print cuted_num
  22. cleaned_data = temp[:cuted_num]
  23. #print cleaned_data,len(cleaned_data)
  24. #print len(cleaned_data)
  25. return cleaned_data