M-Estimators for Robust Linear ModelingΒΆ
In [1]:
from __future__ import print_function
from statsmodels.compat import lmap
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
- An M-estimator minimizes the function
$$Q(e_i, \rho) = \sum_i~\rho \left (\frac{e_i}{s}\right )$$
where $\rho$ is a symmetric function of the residuals
- The effect of $\rho$ is to reduce the influence of outliers
- $s$ is an estimate of scale.
- The robust estimates $\hat{\beta}$ are computed by the iteratively re-weighted least squares algorithm
- We have several choices available for the weighting functions to be used
In [2]:
norms = sm.robust.norms
In [3]:
def plot_weights(support, weights_func, xlabels, xticks):
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax.plot(support, weights_func(support))
ax.set_xticks(xticks)
ax.set_xticklabels(xlabels, fontsize=16)
ax.set_ylim(-.1, 1.1)
return ax
Andrew's Wave¶
In [4]:
help(norms.AndrewWave.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.AndrewWave method Andrew's wave weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = sin(z/a)/(z/a) for \|z\| <= a*pi weights(z) = 0 for \|z\| > a*pi
In [5]:
a = 1.339
support = np.linspace(-np.pi*a, np.pi*a, 100)
andrew = norms.AndrewWave(a=a)
plot_weights(support, andrew.weights, ['$-\pi*a$', '0', '$\pi*a$'], [-np.pi*a, 0, np.pi*a]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b678fb90>
Hampel's 17A¶
In [6]:
help(norms.Hampel.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.Hampel method Hampel weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = 1 for \|z\| <= a weights(z) = a/\|z\| for a < \|z\| <= b weights(z) = a*(c - \|z\|)/(\|z\|*(c-b)) for b < \|z\| <= c weights(z) = 0 for \|z\| > c
In [7]:
c = 8
support = np.linspace(-3*c, 3*c, 1000)
hampel = norms.Hampel(a=2., b=4., c=c)
plot_weights(support, hampel.weights, ['3*c', '0', '3*c'], [-3*c, 0, 3*c]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b67e8e10>
Huber's t¶
In [8]:
help(norms.HuberT.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.HuberT method Huber's t weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = 1 for \|z\| <= t weights(z) = t/\|z\| for \|z\| > t
In [9]:
t = 1.345
support = np.linspace(-3*t, 3*t, 1000)
huber = norms.HuberT(t=t)
plot_weights(support, huber.weights, ['-3*t', '0', '3*t'], [-3*t, 0, 3*t]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b66335d0>
Least Squares¶
In [10]:
help(norms.LeastSquares.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.LeastSquares method The least squares estimator weighting function for the IRLS algorithm. The psi function scaled by the input z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = np.ones(z.shape)
In [11]:
support = np.linspace(-3, 3, 1000)
lst_sq = norms.LeastSquares()
plot_weights(support, lst_sq.weights, ['-3', '0', '3'], [-3, 0, 3]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b664dd90>
Ramsay's Ea¶
In [12]:
help(norms.RamsayE.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.RamsayE method Ramsay's Ea weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = exp(-a*\|z\|)
In [13]:
a = .3
support = np.linspace(-3*a, 3*a, 1000)
ramsay = norms.RamsayE(a=a)
plot_weights(support, ramsay.weights, ['-3*a', '0', '3*a'], [-3*a, 0, 3*a]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b68f6610>
Trimmed Mean¶
In [14]:
help(norms.TrimmedMean.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.TrimmedMean method Least trimmed mean weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = 1 for \|z\| <= c weights(z) = 0 for \|z\| > c
In [15]:
c = 2
support = np.linspace(-3*c, 3*c, 1000)
trimmed = norms.TrimmedMean(c=c)
plot_weights(support, trimmed.weights, ['-3*c', '0', '3*c'], [-3*c, 0, 3*c]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b6a8e250>
Tukey's Biweight¶
In [16]:
help(norms.TukeyBiweight.weights)
Help on method weights in module statsmodels.robust.norms: weights(self, z) unbound statsmodels.robust.norms.TukeyBiweight method Tukey's biweight weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array psi(z) = (1 - (z/c)**2)**2 for \|z\| <= R psi(z) = 0 for \|z\| > R
In [17]:
c = 4.685
support = np.linspace(-3*c, 3*c, 1000)
tukey = norms.TukeyBiweight(c=c)
plot_weights(support, tukey.weights, ['-3*c', '0', '3*c'], [-3*c, 0, 3*c]);
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b755b150>
Scale Estimators¶
- Robust estimates of the location
In [18]:
x = np.array([1, 2, 3, 4, 500])
- The mean is not a robust estimator of location
In [19]:
x.mean()
Out[19]:
102.0
- The median, on the other hand, is a robust estimator with a breakdown point of 50%
In [20]:
np.median(x)
Out[20]:
3.0
- Analagously for the scale
- The standard deviation is not robust
In [21]:
x.std()
Out[21]:
199.00251254695254
Median Absolute Deviation
$$ median_i |X_i - median_j(X_j)|) $$
Standardized Median Absolute Deviation is a consistent estimator for $\hat{\sigma}$
$$\hat{\sigma}=K \cdot MAD$$
where $K$ depends on the distribution. For the normal distribution for example,
$$K = \Phi^{-1}(.75)$$
In [22]:
stats.norm.ppf(.75)
Out[22]:
0.67448975019608171
In [23]:
print(x)
[ 1 2 3 4 500]
In [24]:
sm.robust.scale.stand_mad(x)
/build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/robust/scale.py:49: FutureWarning: stand_mad is deprecated and will be removed in 0.7.0. Use mad instead. "instead.", FutureWarning)
Out[24]:
1.482602218505602
In [25]:
np.array([1,2,3,4,5.]).std()
Out[25]:
1.4142135623730951
- The default for Robust Linear Models is MAD
- another popular choice is Huber's proposal 2
In [26]:
np.random.seed(12345)
fat_tails = stats.t(6).rvs(40)
In [27]:
kde = sm.nonparametric.KDE(fat_tails)
kde.fit()
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
ax.plot(kde.support, kde.density);
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-315-605f8dff1422> in <module>() ----> 1 kde = sm.nonparametric.KDE(fat_tails) 2 kde.fit() 3 fig = plt.figure(figsize=(12,8)) 4 ax = fig.add_subplot(111) 5 ax.plot(kde.support, kde.density); AttributeError: 'module' object has no attribute 'KDE'
In [28]:
print(fat_tails.mean(), fat_tails.std())
0.0688231044811 1.34716332297
In [29]:
print(stats.norm.fit(fat_tails))
(0.068823104481087499, 1.3471633229698652)
In [30]:
print(stats.t.fit(fat_tails, f0=6))
(6, 0.039009187170278181, 1.0564230978488927)
In [31]:
huber = sm.robust.scale.Huber()
loc, scale = huber(fat_tails)
print(loc, scale)
0.0404898433327 1.15571400476
In [32]:
sm.robust.stand_mad(fat_tails)
Out[32]:
1.1153350011654151
In [33]:
sm.robust.stand_mad(fat_tails, c=stats.t(6).ppf(.75))
Out[33]:
1.0483916565928972
In [34]:
sm.robust.scale.mad(fat_tails)
Out[34]:
1.1153350011654151
Duncan's Occupational Prestige data - M-estimation for outliers¶
In [35]:
from statsmodels.graphics.api import abline_plot
from statsmodels.formula.api import ols, rlm
In [36]:
prestige = sm.datasets.get_rdataset("Duncan", "car", cache=True).data
--------------------------------------------------------------------------- URLError Traceback (most recent call last) <ipython-input-324-d85a43c9ce16> in <module>() ----> 1 prestige = sm.datasets.get_rdataset("Duncan", "car", cache=True).data /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in get_rdataset(dataname, package, cache) 284 "master/doc/"+package+"/rst/") 285 cache = _get_cache(cache) --> 286 data, from_cache = _get_data(data_base_url, dataname, cache) 287 data = read_csv(data, index_col=0) 288 data = _maybe_reset_index(data) /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _get_data(base_url, dataname, cache, extension) 215 url = base_url + (dataname + ".%s") % extension 216 try: --> 217 data, from_cache = _urlopen_cached(url, cache) 218 except HTTPError as err: 219 if '404' in str(err): /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _urlopen_cached(url, cache) 206 # not using the cache or didn't find it in cache 207 if not from_cache: --> 208 data = urlopen(url).read() 209 if cache is not None: # then put it in the cache 210 _cache_it(data, cache_path) /usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context) 152 else: 153 opener = _opener --> 154 return opener.open(url, data, timeout) 155 156 def install_opener(opener): /usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout) 427 req = meth(req) 428 --> 429 response = self._open(req, data) 430 431 # post-process response /usr/lib/python2.7/urllib2.pyc in _open(self, req, data) 445 protocol = req.get_type() 446 result = self._call_chain(self.handle_open, protocol, protocol + --> 447 '_open', req) 448 if result: 449 return result /usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args) 405 func = getattr(handler, meth_name) 406 --> 407 result = func(*args) 408 if result is not None: 409 return result /usr/lib/python2.7/urllib2.pyc in https_open(self, req) 1239 def https_open(self, req): 1240 return self.do_open(httplib.HTTPSConnection, req, -> 1241 context=self._context) 1242 1243 https_request = AbstractHTTPHandler.do_request_ /usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args) 1196 except socket.error, err: # XXX what error? 1197 h.close() -> 1198 raise URLError(err) 1199 else: 1200 try: URLError: <urlopen error [Errno -3] Temporary failure in name resolution>
In [37]:
print(prestige.head(10))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-325-d3ced65c396c> in <module>() ----> 1 print(prestige.head(10)) NameError: name 'prestige' is not defined
In [38]:
fig = plt.figure(figsize=(12,12))
ax1 = fig.add_subplot(211, xlabel='Income', ylabel='Prestige')
ax1.scatter(prestige.income, prestige.prestige)
xy_outlier = prestige.ix['minister'][['income','prestige']]
ax1.annotate('Minister', xy_outlier, xy_outlier+1, fontsize=16)
ax2 = fig.add_subplot(212, xlabel='Education',
ylabel='Prestige')
ax2.scatter(prestige.education, prestige.prestige);
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-326-ab347cc850d5> in <module>() 1 fig = plt.figure(figsize=(12,12)) 2 ax1 = fig.add_subplot(211, xlabel='Income', ylabel='Prestige') ----> 3 ax1.scatter(prestige.income, prestige.prestige) 4 xy_outlier = prestige.ix['minister'][['income','prestige']] 5 ax1.annotate('Minister', xy_outlier, xy_outlier+1, fontsize=16) NameError: name 'prestige' is not defined
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b6a8e8d0>
In [39]:
ols_model = ols('prestige ~ income + education', prestige).fit()
print(ols_model.summary())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-327-94340cd202f8> in <module>() ----> 1 ols_model = ols('prestige ~ income + education', prestige).fit() 2 print(ols_model.summary()) NameError: name 'prestige' is not defined
In [40]:
infl = ols_model.get_influence()
student = infl.summary_frame()['student_resid']
print(student)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-328-762835c5010b> in <module>() ----> 1 infl = ols_model.get_influence() 2 student = infl.summary_frame()['student_resid'] 3 print(student) NameError: name 'ols_model' is not defined
In [41]:
print(student.ix[np.abs(student) > 2])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-329-4de70e5d7415> in <module>() ----> 1 print(student.ix[np.abs(student) > 2]) NameError: name 'student' is not defined
In [42]:
print(infl.summary_frame().ix['minister'])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-330-1b2a9fb1d022> in <module>() ----> 1 print(infl.summary_frame().ix['minister']) NameError: name 'infl' is not defined
In [43]:
sidak = ols_model.outlier_test('sidak')
sidak.sort('unadj_p', inplace=True)
print(sidak)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-331-88381d4a1ea2> in <module>() ----> 1 sidak = ols_model.outlier_test('sidak') 2 sidak.sort('unadj_p', inplace=True) 3 print(sidak) NameError: name 'ols_model' is not defined
In [44]:
fdr = ols_model.outlier_test('fdr_bh')
fdr.sort('unadj_p', inplace=True)
print(fdr)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-332-69e9cb113222> in <module>() ----> 1 fdr = ols_model.outlier_test('fdr_bh') 2 fdr.sort('unadj_p', inplace=True) 3 print(fdr) NameError: name 'ols_model' is not defined
In [45]:
rlm_model = rlm('prestige ~ income + education', prestige).fit()
print(rlm_model.summary())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-333-f16d19a78b97> in <module>() ----> 1 rlm_model = rlm('prestige ~ income + education', prestige).fit() 2 print(rlm_model.summary()) NameError: name 'prestige' is not defined
In [46]:
print(rlm_model.weights)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-334-2a0e93853fea> in <module>() ----> 1 print(rlm_model.weights) NameError: name 'rlm_model' is not defined
Hertzprung Russell data for Star Cluster CYG 0B1 - Leverage Points¶
- Data is on the luminosity and temperature of 47 stars in the direction of Cygnus.
In [47]:
dta = sm.datasets.get_rdataset("starsCYG", "robustbase", cache=True).data
--------------------------------------------------------------------------- URLError Traceback (most recent call last) <ipython-input-335-4880a5abdb55> in <module>() ----> 1 dta = sm.datasets.get_rdataset("starsCYG", "robustbase", cache=True).data /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in get_rdataset(dataname, package, cache) 284 "master/doc/"+package+"/rst/") 285 cache = _get_cache(cache) --> 286 data, from_cache = _get_data(data_base_url, dataname, cache) 287 data = read_csv(data, index_col=0) 288 data = _maybe_reset_index(data) /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _get_data(base_url, dataname, cache, extension) 215 url = base_url + (dataname + ".%s") % extension 216 try: --> 217 data, from_cache = _urlopen_cached(url, cache) 218 except HTTPError as err: 219 if '404' in str(err): /build/statsmodels-0.6.1/debian/python-statsmodels/usr/lib/python2.7/dist-packages/statsmodels/datasets/utils.pyc in _urlopen_cached(url, cache) 206 # not using the cache or didn't find it in cache 207 if not from_cache: --> 208 data = urlopen(url).read() 209 if cache is not None: # then put it in the cache 210 _cache_it(data, cache_path) /usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context) 152 else: 153 opener = _opener --> 154 return opener.open(url, data, timeout) 155 156 def install_opener(opener): /usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout) 427 req = meth(req) 428 --> 429 response = self._open(req, data) 430 431 # post-process response /usr/lib/python2.7/urllib2.pyc in _open(self, req, data) 445 protocol = req.get_type() 446 result = self._call_chain(self.handle_open, protocol, protocol + --> 447 '_open', req) 448 if result: 449 return result /usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args) 405 func = getattr(handler, meth_name) 406 --> 407 result = func(*args) 408 if result is not None: 409 return result /usr/lib/python2.7/urllib2.pyc in https_open(self, req) 1239 def https_open(self, req): 1240 return self.do_open(httplib.HTTPSConnection, req, -> 1241 context=self._context) 1242 1243 https_request = AbstractHTTPHandler.do_request_ /usr/lib/python2.7/urllib2.pyc in do_open(self, http_class, req, **http_conn_args) 1196 except socket.error, err: # XXX what error? 1197 h.close() -> 1198 raise URLError(err) 1199 else: 1200 try: URLError: <urlopen error [Errno -3] Temporary failure in name resolution>
In [48]:
from matplotlib.patches import Ellipse
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, xlabel='log(Temp)', ylabel='log(Light)', title='Hertzsprung-Russell Diagram of Star Cluster CYG OB1')
ax.scatter(*dta.values.T)
# highlight outliers
e = Ellipse((3.5, 6), .2, 1, alpha=.25, color='r')
ax.add_patch(e);
ax.annotate('Red giants', xy=(3.6, 6), xytext=(3.8, 6),
arrowprops=dict(facecolor='black', shrink=0.05, width=2),
horizontalalignment='left', verticalalignment='bottom',
clip_on=True, # clip to the axes bounding box
fontsize=16,
)
# annotate these with their index
for i,row in dta.ix[dta['log.Te'] < 3.8].iterrows():
ax.annotate(i, row, row + .01, fontsize=14)
xlim, ylim = ax.get_xlim(), ax.get_ylim()
-c:4: RuntimeWarning: Tried to set a label via parameter 'y' in func 'scatter' but couldn't find such an argument. (This is a programming error, please report to the matplotlib list!)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-336-d061dc5cc6d1> in <module>() 2 fig = plt.figure(figsize=(12,8)) 3 ax = fig.add_subplot(111, xlabel='log(Temp)', ylabel='log(Light)', title='Hertzsprung-Russell Diagram of Star Cluster CYG OB1') ----> 4 ax.scatter(*dta.values.T) 5 # highlight outliers 6 e = Ellipse((3.5, 6), .2, 1, alpha=.25, color='r') /usr/lib/python2.7/dist-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs) 1812 warnings.warn(msg % (label_namer, func.__name__), 1813 RuntimeWarning, stacklevel=2) -> 1814 return func(ax, *args, **kwargs) 1815 pre_doc = inner.__doc__ 1816 if pre_doc is None: TypeError: scatter() takes at least 3 arguments (2 given)
Error in callback <function post_execute at 0x7f89c987e578> (for post_execute):
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /usr/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in post_execute() 145 def post_execute(): 146 if matplotlib.is_interactive(): --> 147 draw_all() 148 149 # IPython >= 2 /usr/lib/python2.7/dist-packages/matplotlib/_pylab_helpers.pyc in draw_all(cls, force) 148 for f_mgr in cls.get_all_fig_managers(): 149 if force or f_mgr.canvas.figure.stale: --> 150 f_mgr.canvas.draw_idle() 151 152 atexit.register(Gcf.destroy_all) /usr/lib/python2.7/dist-packages/matplotlib/backend_bases.pyc in draw_idle(self, *args, **kwargs) 2024 if not self._is_idle_drawing: 2025 with self._idle_draw_cntx(): -> 2026 self.draw(*args, **kwargs) 2027 2028 def draw_cursor(self, event): /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /usr/lib/python2.7/dist-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1106 ticks_to_draw = self._update_ticks(renderer) 1107 ticklabelBoxes, ticklabelBoxes2 = self._get_tick_bboxes(ticks_to_draw, -> 1108 renderer) 1109 1110 for tick in ticks_to_draw: /usr/lib/python2.7/dist-packages/matplotlib/axis.pyc in _get_tick_bboxes(self, ticks, renderer) 1056 for tick in ticks: 1057 if tick.label1On and tick.label1.get_visible(): -> 1058 extent = tick.label1.get_window_extent(renderer) 1059 ticklabelBoxes.append(extent) 1060 if tick.label2On and tick.label2.get_visible(): /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in get_window_extent(self, renderer, dpi) 959 raise RuntimeError('Cannot get window extent w/o renderer') 960 --> 961 bbox, info, descent = self._get_layout(self._renderer) 962 x, y = self.get_unitless_position() 963 x, y = self.get_transform().transform_point((x, y)) /usr/lib/python2.7/dist-packages/matplotlib/text.pyc in _get_layout(self, renderer) 350 tmp, lp_h, lp_bl = renderer.get_text_width_height_descent('lp', 351 self._fontproperties, --> 352 ismath=False) 353 offsety = (lp_h - lp_bl) * self._linespacing 354 /usr/lib/python2.7/dist-packages/matplotlib/backends/backend_agg.pyc in get_text_width_height_descent(self, s, prop, ismath) 227 fontsize = prop.get_size_in_points() 228 w, h, d = texmanager.get_text_width_height_descent(s, fontsize, --> 229 renderer=self) 230 return w, h, d 231 /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in get_text_width_height_descent(self, tex, fontsize, renderer) 673 else: 674 # use dviread. It sometimes returns a wrong descent. --> 675 dvifile = self.make_dvi(tex, fontsize) 676 dvi = dviread.Dvi(dvifile, 72 * dpi_fraction) 677 try: /usr/lib/python2.7/dist-packages/matplotlib/texmanager.pyc in make_dvi(self, tex, fontsize) 420 'string:\n%s\nHere is the full report generated by ' 421 'LaTeX: \n\n' % repr(tex.encode('unicode_escape')) + --> 422 report)) 423 else: 424 mpl.verbose.report(report, 'debug') RuntimeError: LaTeX was not able to process the following string: 'lp' Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x7f89b73fe9d0>
In [49]:
from IPython.display import Image
Image(filename='star_diagram.png')
Out[49]:
In [50]:
y = dta['log.light']
X = sm.add_constant(dta['log.Te'], prepend=True)
ols_model = sm.OLS(y, X).fit()
abline_plot(model_results=ols_model, ax=ax)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-338-6c7349bbf59f> in <module>() ----> 1 y = dta['log.light'] 2 X = sm.add_constant(dta['log.Te'], prepend=True) 3 ols_model = sm.OLS(y, X).fit() 4 abline_plot(model_results=ols_model, ax=ax) /usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key) 1993 return self._getitem_multilevel(key) 1994 else: -> 1995 return self._getitem_column(key) 1996 1997 def _getitem_column(self, key): /usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key) 2000 # get column 2001 if self.columns.is_unique: -> 2002 return self._get_item_cache(key) 2003 2004 # duplicate columns & possible reduce dimensionality /usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item) 1343 res = cache.get(item) 1344 if res is None: -> 1345 values = self._data.get(item) 1346 res = self._box_item_values(item, values) 1347 cache[item] = res /usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item, fastpath) 3287 3288 if not isnull(item): -> 3289 loc = self.items.get_loc(item) 3290 else: 3291 indexer = np.arange(len(self.items))[isnull(self.items)] /usr/lib/python2.7/dist-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance) 1918 return self._engine.get_loc(key) 1919 except KeyError: -> 1920 return self._engine.get_loc(self._maybe_cast_indexer(key)) 1921 1922 indexer = self.get_indexer([key], method=method, tolerance=tolerance) /usr/lib/python2.7/dist-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:4027)() /usr/lib/python2.7/dist-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3891)() /usr/lib/python2.7/dist-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12408)() /usr/lib/python2.7/dist-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12359)() KeyError: 'log.light'
In [51]:
rlm_mod = sm.RLM(y, X, sm.robust.norms.TrimmedMean(.5)).fit()
abline_plot(model_results=rlm_mod, ax=ax, color='red')
Out[51]:
<matplotlib.figure.Figure at 0x7f89b73fe9d0>
- Why? Because M-estimators are not robust to leverage points.
In [52]:
infl = ols_model.get_influence()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-340-5ff696623192> in <module>() ----> 1 infl = ols_model.get_influence() NameError: name 'ols_model' is not defined
In [53]:
h_bar = 2*(ols_model.df_model + 1 )/ols_model.nobs
hat_diag = infl.summary_frame()['hat_diag']
hat_diag.ix[hat_diag > h_bar]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-341-0fb883e54663> in <module>() ----> 1 h_bar = 2*(ols_model.df_model + 1 )/ols_model.nobs 2 hat_diag = infl.summary_frame()['hat_diag'] 3 hat_diag.ix[hat_diag > h_bar] NameError: name 'ols_model' is not defined
In [54]:
sidak2 = ols_model.outlier_test('sidak')
sidak2.sort('unadj_p', inplace=True)
print(sidak2)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-342-a6c575f5e012> in <module>() ----> 1 sidak2 = ols_model.outlier_test('sidak') 2 sidak2.sort('unadj_p', inplace=True) 3 print(sidak2) NameError: name 'ols_model' is not defined
In [55]:
fdr2 = ols_model.outlier_test('fdr_bh')
fdr2.sort('unadj_p', inplace=True)
print(fdr2)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-343-cf26bbbd14f6> in <module>() ----> 1 fdr2 = ols_model.outlier_test('fdr_bh') 2 fdr2.sort('unadj_p', inplace=True) 3 print(fdr2) NameError: name 'ols_model' is not defined
- Let's delete that line
In [56]:
del ax.lines[-1]
In [57]:
weights = np.ones(len(X))
weights[X[X['log.Te'] < 3.8].index.values - 1] = 0
wls_model = sm.WLS(y, X, weights=weights).fit()
abline_plot(model_results=wls_model, ax=ax, color='green')
-c:2: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-345-e3ec53a40864> in <module>() 1 weights = np.ones(len(X)) ----> 2 weights[X[X['log.Te'] < 3.8].index.values - 1] = 0 3 wls_model = sm.WLS(y, X, weights=weights).fit() 4 abline_plot(model_results=wls_model, ax=ax, color='green') IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
- MM estimators are good for this type of problem, unfortunately, we don't yet have these yet.
- It's being worked on, but it gives a good excuse to look at the R cell magics in the notebook.
In [58]:
yy = y.values[:,None]
xx = X['log.Te'].values[:,None]
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-346-a5672cb240e0> in <module>() ----> 1 yy = y.values[:,None] 2 xx = X['log.Te'].values[:,None] AttributeError: 'numpy.ndarray' object has no attribute 'values'
In [59]:
%load_ext rmagic
%R library(robustbase)
%Rpush yy xx
%R mod <- lmrob(yy ~ xx);
%R params <- mod$coefficients;
%Rpull params
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-347-53d6967d0401> in <module>() ----> 1 get_ipython().magic(u'load_ext rmagic') 2 3 get_ipython().magic(u'R library(robustbase)') 4 get_ipython().magic(u'Rpush yy xx') 5 get_ipython().magic(u'R mod <- lmrob(yy ~ xx);') /usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s) 2203 magic_name, _, magic_arg_s = arg_s.partition(' ') 2204 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC) -> 2205 return self.run_line_magic(magic_name, magic_arg_s) 2206 2207 #------------------------------------------------------------------------- /usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line) 2124 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals 2125 with self.builtin_trap: -> 2126 result = fn(*args,**kwargs) 2127 return result 2128 <decorator-gen-64> in load_ext(self, module_str) /usr/lib/python2.7/dist-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k) 191 # but it's overkill for just that one bit of state. 192 def magic_deco(arg): --> 193 call = lambda f, *a, **k: f(*a, **k) 194 195 if callable(arg): /usr/lib/python2.7/dist-packages/IPython/core/magics/extension.pyc in load_ext(self, module_str) 61 if not module_str: 62 raise UsageError('Missing module name.') ---> 63 res = self.shell.extension_manager.load_extension(module_str) 64 65 if res == 'already loaded': /usr/lib/python2.7/dist-packages/IPython/core/extensions.pyc in load_extension(self, module_str) 96 if module_str not in sys.modules: 97 with prepended_to_syspath(self.ipython_extension_dir): ---> 98 __import__(module_str) 99 mod = sys.modules[module_str] 100 if self._call_load_ipython_extension(mod): /usr/lib/python2.7/dist-packages/IPython/extensions/rmagic.py in <module>() 54 import numpy as np 55 ---> 56 import rpy2.rinterface as ri 57 import rpy2.robjects as ro 58 try: ImportError: No module named rpy2.rinterface
In [60]:
%R print(mod)
ERROR: Line magic function `%R` not found.
In [61]:
print(params)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-349-73ac4b936803> in <module>() ----> 1 print(params) NameError: name 'params' is not defined
In [62]:
abline_plot(intercept=params[0], slope=params[1], ax=ax, color='green')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-350-e1a9f35b3320> in <module>() ----> 1 abline_plot(intercept=params[0], slope=params[1], ax=ax, color='green') NameError: name 'params' is not defined
Exercise: Breakdown points of M-estimator¶
In [63]:
np.random.seed(12345)
nobs = 200
beta_true = np.array([3, 1, 2.5, 3, -4])
X = np.random.uniform(-20,20, size=(nobs, len(beta_true)-1))
# stack a constant in front
X = sm.add_constant(X, prepend=True) # np.c_[np.ones(nobs), X]
mc_iter = 500
contaminate = .25 # percentage of response variables to contaminate
In [64]:
all_betas = []
for i in range(mc_iter):
y = np.dot(X, beta_true) + np.random.normal(size=200)
random_idx = np.random.randint(0, nobs, size=int(contaminate * nobs))
y[random_idx] = np.random.uniform(-750, 750)
beta_hat = sm.RLM(y, X).fit().params
all_betas.append(beta_hat)
In [65]:
all_betas = np.asarray(all_betas)
se_loss = lambda x : np.linalg.norm(x, ord=2)**2
se_beta = lmap(se_loss, all_betas - beta_true)
Squared error loss¶
In [66]:
np.array(se_beta).mean()
Out[66]:
0.44502948730686182
In [67]:
all_betas.mean(0)
Out[67]:
array([ 2.9971, 0.999 , 2.4991, 2.9971, -3.9963])
In [68]:
beta_true
Out[68]:
array([ 3. , 1. , 2.5, 3. , -4. ])
In [69]:
se_loss(all_betas.mean(0) - beta_true)
Out[69]:
3.2360913286754188e-05