/usr/share/pyshared/statsmodels/tools/datautils.py is in python-statsmodels 0.4.2-1.2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | import os
import time
import numpy as np
from numpy import genfromtxt, array
class Dataset(dict):
def __init__(self, **kw):
dict.__init__(self,kw)
self.__dict__ = self
# Some datasets have string variables. If you want a raw_data attribute you
# must create this in the dataset's load function.
try: # some datasets have string variables
self.raw_data = self.data.view((float, len(self.names)))
except:
pass
def __repr__(self):
return str(self.__class__)
def process_recarray(data, endog_idx=0, exog_idx=None, stack=True, dtype=None):
names = list(data.dtype.names)
if isinstance(endog_idx, int):
endog = array(data[names[endog_idx]], dtype=dtype)
endog_name = names[endog_idx]
endog_idx = [endog_idx]
else:
endog_name = [names[i] for i in endog_idx]
if stack:
endog = np.column_stack(data[field] for field in endog_name)
else:
endog = data[endog_name]
if exog_idx is None:
exog_name = [names[i] for i in xrange(len(names))
if i not in endog_idx]
else:
exog_name = [names[i] for i in exog_idx]
if stack:
exog = np.column_stack(data[field] for field in exog_name)
else:
exog = data[exog_name]
if dtype:
endog = endog.astype(dtype)
exog = exog.astype(dtype)
dataset = Dataset(data=data, names=names, endog=endog, exog=exog,
endog_name=endog_name, exog_name=exog_name)
return dataset
def process_recarray_pandas(data, endog_idx=0, exog_idx=None, dtype=None):
from pandas import DataFrame
data = DataFrame(data, dtype=dtype)
names = data.columns
if isinstance(endog_idx, int):
endog_name = names[endog_idx]
endog = data[endog_name]
if exog_idx is None:
exog = data.drop([endog_name], axis=1)
else:
exog = data.filter(names[exog_idx])
else:
endog = data.ix[:, endog_idx]
endog_name = list(endog.columns)
if exog_idx is None:
exog = data.drop(endog_name, axis=1)
elif isinstance(exog_idx, int):
exog = data.filter([names[exog_idx]])
else:
exog = data.filter(names[exog_idx])
exog_name = list(exog.columns)
dataset = Dataset(data=data, names=list(names), endog=endog, exog=exog,
endog_name=endog_name, exog_name=exog_name)
return dataset
|