startup.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #!/usr/bin/python
  2. print
  3. print "checking for nltk"
  4. try:
  5. import nltk
  6. except ImportError:
  7. print "you should install nltk before continuing"
  8. print "checking for numpy"
  9. try:
  10. import numpy
  11. except ImportError:
  12. print "you should install numpy before continuing"
  13. print "checking for scipy"
  14. try:
  15. import scipy
  16. except:
  17. print "you should install scipy before continuing"
  18. print "checking for sklearn"
  19. try:
  20. import sklearn
  21. except:
  22. print "you should install sklearn before continuing"
  23. print
  24. print "downloading the Enron dataset (this may take a while)"
  25. print "to check on progress, you can cd up one level, then execute <ls -lthr>"
  26. print "Enron dataset should be last item on the list, along with its current size"
  27. print "download will complete at about 423 MB"
  28. import urllib
  29. url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
  30. urllib.urlretrieve(url, filename="../enron_mail_20150507.tar.gz")
  31. print "download complete!"
  32. print
  33. print "unzipping Enron dataset (this may take a while)"
  34. import tarfile
  35. import os
  36. os.chdir("..")
  37. tfile = tarfile.open("enron_mail_20150507.tar.gz", "r:gz")
  38. tfile.extractall(".")
  39. print "you're ready to go!"