Author: Diane Trout <diane@ghic.org>
Last-Update: 2017-09-26 22:43:33 -0700
Description: Use cached datasets to enable building documentation
 without accessing internet.  The datasets were described in
 debian/README.source

--- a/docs/source/contingency_tables.rst
+++ b/docs/source/contingency_tables.rst
@@ -49,7 +49,7 @@
     import pandas as pd
     import statsmodels.api as sm
 
-    df = sm.datasets.get_rdataset("Arthritis", "vcd").data
+    df = sm.datasets.get_rdataset("Arthritis", "vcd", cache=True).data
 
     tab = pd.crosstab(df['Treatment'], df['Improved'])
     tab = tab.loc[:, ["None", "Some", "Marked"]]
--- a/docs/source/duration.rst
+++ b/docs/source/duration.rst
@@ -42,7 +42,7 @@
 
    import statsmodels.api as sm
 
-   data = sm.datasets.get_rdataset("flchain", "survival").data
+   data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
    df = data.loc[data.sex == "F", :]
    sf = sm.SurvfuncRight(df["futime"], df["death"])
 
@@ -155,7 +155,7 @@
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
 
-   data = sm.datasets.get_rdataset("flchain", "survival").data
+   data = sm.datasets.get_rdataset("flchain", "survival", cache=True).data
    del data["chapter"]
    data = data.dropna()
    data["lam"] = data["lambda"]
--- a/docs/source/example_formulas.rst
+++ b/docs/source/example_formulas.rst
@@ -45,7 +45,7 @@
 
 .. ipython:: python
 
-    df = sm.datasets.get_rdataset("Guerry", "HistData").data
+    df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
     df = df[['Lottery', 'Literacy', 'Wealth', 'Region']].dropna()
     df.head()
 
--- a/docs/source/gee.rst
+++ b/docs/source/gee.rst
@@ -24,7 +24,7 @@
     import statsmodels.api as sm
     import statsmodels.formula.api as smf
 
-    data = sm.datasets.get_rdataset('epil', package='MASS').data
+    data = sm.datasets.get_rdataset('epil', package='MASS', cache=True).data
 
     fam = sm.families.Poisson()
     ind = sm.cov_struct.Exchangeable()
--- a/docs/source/gettingstarted.rst
+++ b/docs/source/gettingstarted.rst
@@ -43,7 +43,7 @@
 
 .. ipython:: python
 
-    df = sm.datasets.get_rdataset("Guerry", "HistData").data
+    df = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
 
 The `Input/Output doc page <iolib.html>`_ shows how to import from various
 other formats.
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -25,7 +25,7 @@
     import statsmodels.formula.api as smf
 
     # Load data
-    dat = sm.datasets.get_rdataset("Guerry", "HistData").data
+    dat = sm.datasets.get_rdataset("Guerry", "HistData", cache=True).data
 
     # Fit regression model (using the natural log of one of the regressors)
     results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
--- a/docs/source/mixed_linear.rst
+++ b/docs/source/mixed_linear.rst
@@ -35,7 +35,7 @@
   import statsmodels.api as sm
   import statsmodels.formula.api as smf
 
-  data = sm.datasets.get_rdataset("dietox", "geepack").data
+  data = sm.datasets.get_rdataset("dietox", "geepack", cache=True).data
 
   md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])
   mdf = md.fit()
--- a/docs/source/release/version0.6.rst
+++ b/docs/source/release/version0.6.rst
@@ -43,7 +43,7 @@
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
 
-   data = sm.datasets.get_rdataset("epil", "MASS").data
+   data = sm.datasets.get_rdataset("epil", "MASS", cache=True).data
 
    md = smf.gee("y ~ age + trt + base", "subject", data,
                 cov_struct=sm.cov_struct.Independence(), 
--- a/docs/source/datasets/index.rst
+++ b/docs/source/datasets/index.rst
@@ -30,7 +30,7 @@
 .. ipython:: python
 
    import statsmodels.api as sm
-   duncan_prestige = sm.datasets.get_rdataset("Duncan", "car")
+   duncan_prestige = sm.datasets.get_rdataset("Duncan", "car", cache=True)
    print(duncan_prestige.__doc__)
    duncan_prestige.data.head(5)
 
