diff --git a/doc/index.rst b/doc/index.rst index f1dd8e3e064013c8ca870e719cabb28aa455425a..b75a0fee7ffd54e736f085ea157cf21f953b0678 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -92,11 +92,12 @@ User guide ====================== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 install tutorial - module/index + supervised_learning + unsupervised_learning auto_examples/index contribute .. API diff --git a/doc/modules/em.rst b/doc/modules/em.rst index 814cc449269bb99a03da954d2f216f6f649f65ae..3985aa12f835d68505d6d8c96f09c1298ff3ff4f 100644 --- a/doc/modules/em.rst +++ b/doc/modules/em.rst @@ -1,22 +1,5 @@ -.. - restindex - page-title: Em - crumb: Em - link-title: Em - encoding: utf-8 - output-encoding: None - tags: python,em,Expectation Maximization,EM,online EM,recursive EM - file: basic_example1.py - file: basic_example2.py - file: basic_example3.py - file: example1.png - file: Bic_example.png - /restindex - -.. Last Change: Sun Jul 22 11:00 AM 2007 J - =================================================== - em, a python package for Gaussian mixture models +Gaussian mixture models =================================================== .. contents:: Tables of contents @@ -35,8 +18,14 @@ multi-dimensional array capabilities (ala matlab and the likes); scipy leverages numpy to build common scientific features for signal processing, linear algebra, statistics, etc... +.. warning:: + + The code in the `scikits.learn.em` submodule is not as mature as the + rest of the scikit and is prone to changing. + -basic usage + +Basic usage ============ Once you are inside a python interpreter, you can import the package using the @@ -56,7 +45,7 @@ plot it. The following example show how to create a 2 dimension Gaussian Model with 3 components, sample it and plot its confidence ellipsoids with matplotlib: -.. literalinclude:: ../../scikits/learn/em/examples/basic_example1.py +.. literalinclude:: ../examples/em/basic_example1.py which plots this figure: @@ -84,7 +73,7 @@ iterations of EM; once the EM has finished the computation, the GM instance of GMM contains the computed parameters. -.. literalinclude:: ../../scikits/learn/em/examples/basic_example2.py +.. literalinclude:: ../auto_examples/em/basic_example2.py GMM class do all the hard work for learning: it can compute the sufficient @@ -111,7 +100,7 @@ clusters, and prints which number of clusters is the most likely from the BIC: -.. literalinclude:: ../../scikits/learn/em/examples/basic_example3.py +.. literalinclude:: ../auto_examples/em/basic_example3.py which plots this figure: @@ -194,20 +183,21 @@ this case without insane amount of memory. A C version may be implemented, but this is not my top priority; most of the time, you should avoid full covariance models if possible. -Notes ------ - -I believe the current API simple and powerful enough, except -maybe for plotting (if you think otherwise, I would be happy to hear -your suggestions). Now, I am considering adding some more functionalities -to the toolbox: - +.. + Notes + ----- + + I believe the current API simple and powerful enough, except + maybe for plotting (if you think otherwise, I would be happy to hear + your suggestions). Now, I am considering adding some more functionalities + to the toolbox: + - add simple methods for regularization of covariance matrix (easy) - add bayes prior (using variational Bayes approximation) for overfitting and model selection problems (not trivial, but doable) - improve online EM - -Other things which are doable but which I don't intend to implement are: - + + Other things which are doable but which I don't intend to implement are: + - add other models (mixtures of multinomial: easy, simple HMM: easy, other ?) - add bayes prior using MCMC (hard, use PyMC for sampling ?) diff --git a/doc/modules/index.rst b/doc/modules/index.rst index 985ecc11945083c22e161e1c47cef994181a1ac7..f200e827c35a9d450d3ae8876a40798bed2cf323 100644 --- a/doc/modules/index.rst +++ b/doc/modules/index.rst @@ -4,9 +4,7 @@ Module reference .. toctree:: :maxdepth: 2 - ann - em - eature_selection + feature_selection glm manifold neighbors diff --git a/doc/sphinxext/gen_rst.py b/doc/sphinxext/gen_rst.py index bd87994353b9b7081cb18b1e96e8615c41c0af3f..779470511373847852e10e2ba7928b3c01a244d8 100644 --- a/doc/sphinxext/gen_rst.py +++ b/doc/sphinxext/gen_rst.py @@ -8,6 +8,7 @@ Files that generate images should start with 'plot' """ import os +import shutil fileList = [] @@ -20,34 +21,35 @@ import token, tokenize rst_template = """ -.. %(short_fname)s_example: +.. _example_%(short_fname)s: %(docstring)s -**Source code:** :download:`%(fname)s <%(short_fname)s>` +**Source code:** :download:`%(fname)s <%(fname)s>` -.. literalinclude:: %(short_fname)s +.. literalinclude:: %(fname)s :lines: %(end_row)s- """ plot_rst_template = """ -.. _example_%(fname)s: +.. _example_%(short_fname)s: %(docstring)s .. image:: images/%(image_name)s :align: center -**Source code:** :download:`%(fname)s <%(short_fname)s>` +**Source code:** :download:`%(fname)s <%(fname)s>` -.. literalinclude:: %(short_fname)s +.. literalinclude:: %(fname)s :lines: %(end_row)s- """ def extract_docstring(filename): - # Extract a module-level docstring, if any + """ Extract a module-level docstring, if any + """ lines = file(filename).readlines() start_row = 0 if lines[0].startswith('#!'): @@ -74,49 +76,18 @@ def extract_docstring(filename): def generate_example_rst(app): - rootdir = os.path.join(app.builder.srcdir, 'auto_examples') - exampledir = os.path.abspath(app.builder.srcdir + '/../' + 'examples') - if not os.path.exists(exampledir): - os.makedirs(exampledir) - - datad = [] - - for root, dirs, files in os.walk(exampledir): - for fname in files: - image_name = fname[:-2] + 'png' - global rst_template, plot_rst_template - this_template = rst_template - short_fname = '../../examples/' + fname - if not fname.endswith('py'): - continue - example_file = os.path.join(exampledir, fname) - if fname.startswith('plot'): - # generate the plot as png image if file name - # starts with plot and if it is more recent than an - # existing image. - if not os.path.exists( - os.path.join(rootdir, 'images')): - os.makedirs(os.path.join(rootdir, 'images')) - image_file = os.path.join(rootdir, 'images', image_name) - if (not os.path.exists(image_file) or - os.stat(image_file).st_mtime <= - os.stat(example_file).st_mtime): - print 'plotting %s' % fname - import matplotlib.pyplot as plt - plt.close('all') - mplshell.magic_run(example_file) - plt.savefig(image_file) - this_template = plot_rst_template - - docstring, short_desc, end_row = extract_docstring(example_file) - - f = open(os.path.join(rootdir, fname[:-2] + 'rst'),'w') - f.write( this_template % locals()) - f.flush() - datad.append(fname) + """ Generate the list of examples, as well as the contents of + examples. + """ + root_dir = os.path.join(app.builder.srcdir, 'auto_examples') + example_dir = os.path.abspath(app.builder.srcdir + '/../' + 'examples') + if not os.path.exists(example_dir): + os.makedirs(example_dir) + if not os.path.exists(root_dir): + os.makedirs(root_dir) # we create an index.rst with all examples - fhindex = file(os.path.join(rootdir, 'index.rst'), 'w') + fhindex = file(os.path.join(root_dir, 'index.rst'), 'w') fhindex.write("""\ .. _examples-index: @@ -126,14 +97,76 @@ Examples :Release: |version| :Date: |today| -.. toctree:: - """) - - for fname in datad: - fhindex.write(' %s\n' % (fname[:-3])) + # Here we don't use an os.walk, but we recurse only twice: flat is + # better than nested. + generate_dir_rst('.', fhindex, example_dir, root_dir) + for dir in sorted(os.listdir(example_dir)): + if dir == '.svn': + continue + if os.path.isdir(os.path.join(example_dir, dir)): + generate_dir_rst(dir, fhindex, example_dir, root_dir) fhindex.flush() + +def generate_dir_rst(dir, fhindex, example_dir, root_dir): + """ Generate the rst file for an example directory. + """ + target_dir = os.path.join(root_dir, dir) + src_dir = os.path.join(example_dir, dir) + if not os.path.exists(os.path.join(src_dir, 'README.txt')): + raise IOError('Example directory %s does not have a README.txt file' + % src_dir) + fhindex.write(""" + +%s + +.. toctree:: + +""" % file(os.path.join(src_dir, 'README.txt')).read()) + if not os.path.exists(target_dir): + os.makedirs(target_dir) + for fname in sorted(os.listdir(src_dir)): + if fname.endswith('py'): + generate_file_rst(fname, target_dir, src_dir) + fhindex.write(' %s\n' % (os.path.join(dir, fname[:-3]))) + + +def generate_file_rst(fname, target_dir, src_dir): + """ Generate the rst file for a given example. + """ + image_name = fname[:-2] + 'png' + global rst_template, plot_rst_template + this_template = rst_template + short_fname = os.path.split(src_dir)[-1] + fname + src_file = os.path.join(src_dir, fname) + example_file = os.path.join(target_dir, fname) + shutil.copyfile(src_file, example_file) + if fname.startswith('plot'): + # generate the plot as png image if file name + # starts with plot and if it is more recent than an + # existing image. + if not os.path.exists( + os.path.join(target_dir, 'images')): + os.makedirs(os.path.join(target_dir, 'images')) + image_file = os.path.join(target_dir, 'images', image_name) + if (not os.path.exists(image_file) or + os.stat(image_file).st_mtime <= + os.stat(src_file).st_mtime): + print 'plotting %s' % fname + import matplotlib.pyplot as plt + plt.close('all') + mplshell.magic_run(example_file) + plt.savefig(image_file) + this_template = plot_rst_template + + docstring, short_desc, end_row = extract_docstring(example_file) + + f = open(os.path.join(target_dir, fname[:-2] + 'rst'),'w') + f.write( this_template % locals()) + f.flush() + + def setup(app): app.connect('builder-inited', generate_example_rst) diff --git a/doc/supervised_learning.rst b/doc/supervised_learning.rst new file mode 100644 index 0000000000000000000000000000000000000000..1ccf86ee7e770aa9a50630a60e8b34af0da32883 --- /dev/null +++ b/doc/supervised_learning.rst @@ -0,0 +1,14 @@ + +Supervised learning +----------------------- + +.. toctree:: + + modules/svm + modules/glm + modules/neighbors + modules/ann + modules/feature_selection + + + diff --git a/doc/unsupervised_learning.rst b/doc/unsupervised_learning.rst new file mode 100644 index 0000000000000000000000000000000000000000..de0b0e5537c55b2a56968ddd0e595d1a7af9b035 --- /dev/null +++ b/doc/unsupervised_learning.rst @@ -0,0 +1,10 @@ + +Unsupervised learning +----------------------- + +.. toctree:: + + modules/em + + + diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a6c6bd5a28fb6cf68aecbab842d66ad248235d3 --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,6 @@ + +General examples +------------------- + +General-purpose and introductory examples for the scikit. +