{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sales prediction\n",
    "\n",
    "Version with data splitting."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import altair as alt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import r2_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import mean_absolute_error"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://raw.githubusercontent.com/kirenz/datasets/master/advertising.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Market</th>\n",
       "      <th>TV</th>\n",
       "      <th>radio</th>\n",
       "      <th>newspaper</th>\n",
       "      <th>sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>230.1</td>\n",
       "      <td>37.8</td>\n",
       "      <td>69.2</td>\n",
       "      <td>22.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>44.5</td>\n",
       "      <td>39.3</td>\n",
       "      <td>45.1</td>\n",
       "      <td>10.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>17.2</td>\n",
       "      <td>45.9</td>\n",
       "      <td>69.3</td>\n",
       "      <td>9.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>151.5</td>\n",
       "      <td>41.3</td>\n",
       "      <td>58.5</td>\n",
       "      <td>18.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>180.8</td>\n",
       "      <td>10.8</td>\n",
       "      <td>58.4</td>\n",
       "      <td>12.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>196</td>\n",
       "      <td>38.2</td>\n",
       "      <td>3.7</td>\n",
       "      <td>13.8</td>\n",
       "      <td>7.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>196</th>\n",
       "      <td>197</td>\n",
       "      <td>94.2</td>\n",
       "      <td>4.9</td>\n",
       "      <td>8.1</td>\n",
       "      <td>9.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>198</td>\n",
       "      <td>177.0</td>\n",
       "      <td>9.3</td>\n",
       "      <td>6.4</td>\n",
       "      <td>12.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>199</td>\n",
       "      <td>283.6</td>\n",
       "      <td>42.0</td>\n",
       "      <td>66.2</td>\n",
       "      <td>25.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>200</td>\n",
       "      <td>232.1</td>\n",
       "      <td>8.6</td>\n",
       "      <td>8.7</td>\n",
       "      <td>13.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Market     TV  radio  newspaper  sales\n",
       "0         1  230.1   37.8       69.2   22.1\n",
       "1         2   44.5   39.3       45.1   10.4\n",
       "2         3   17.2   45.9       69.3    9.3\n",
       "3         4  151.5   41.3       58.5   18.5\n",
       "4         5  180.8   10.8       58.4   12.9\n",
       "..      ...    ...    ...        ...    ...\n",
       "195     196   38.2    3.7       13.8    7.6\n",
       "196     197   94.2    4.9        8.1    9.7\n",
       "197     198  177.0    9.3        6.4   12.8\n",
       "198     199  283.6   42.0       66.2   25.5\n",
       "199     200  232.1    8.6        8.7   13.4\n",
       "\n",
       "[200 rows x 5 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 200 entries, 0 to 199\n",
      "Data columns (total 5 columns):\n",
      " #   Column     Non-Null Count  Dtype  \n",
      "---  ------     --------------  -----  \n",
      " 0   Market     200 non-null    int64  \n",
      " 1   TV         200 non-null    float64\n",
      " 2   radio      200 non-null    float64\n",
      " 3   newspaper  200 non-null    float64\n",
      " 4   sales      200 non-null    float64\n",
      "dtypes: float64(4), int64(1)\n",
      "memory usage: 7.9 KB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data corrections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# variable Market is categorical\n",
    "df['Market'] = df['Market'].astype('category')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Variable lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define outcome variable as y_label\n",
    "y_label = 'sales'\n",
    "\n",
    "# select features\n",
    "features = df.drop(columns=[y_label, 'Market']).columns\n",
    "\n",
    "# create feature data\n",
    "X = df[features]\n",
    "\n",
    "# create response\n",
    "y = df[y_label]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data splitting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
    "                                                    test_size=0.2,\n",
    "                                                    random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data exploration set\n",
    "df_train = pd.DataFrame(X_train.copy())\n",
    "\n",
    "df_train = df_train.join(pd.DataFrame(y_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TV</th>\n",
       "      <td>160.0</td>\n",
       "      <td>150.019375</td>\n",
       "      <td>84.418857</td>\n",
       "      <td>0.7</td>\n",
       "      <td>77.750</td>\n",
       "      <td>150.65</td>\n",
       "      <td>218.825</td>\n",
       "      <td>296.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>radio</th>\n",
       "      <td>160.0</td>\n",
       "      <td>22.875625</td>\n",
       "      <td>14.805216</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.825</td>\n",
       "      <td>21.20</td>\n",
       "      <td>36.425</td>\n",
       "      <td>49.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>newspaper</th>\n",
       "      <td>160.0</td>\n",
       "      <td>29.945625</td>\n",
       "      <td>20.336449</td>\n",
       "      <td>0.3</td>\n",
       "      <td>12.875</td>\n",
       "      <td>25.60</td>\n",
       "      <td>44.500</td>\n",
       "      <td>100.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sales</th>\n",
       "      <td>160.0</td>\n",
       "      <td>14.100000</td>\n",
       "      <td>5.108754</td>\n",
       "      <td>1.6</td>\n",
       "      <td>10.475</td>\n",
       "      <td>13.20</td>\n",
       "      <td>17.325</td>\n",
       "      <td>27.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           count        mean        std  min     25%     50%      75%    max\n",
       "TV         160.0  150.019375  84.418857  0.7  77.750  150.65  218.825  296.4\n",
       "radio      160.0   22.875625  14.805216  0.0   9.825   21.20   36.425   49.6\n",
       "newspaper  160.0   29.945625  20.336449  0.3  12.875   25.60   44.500  100.9\n",
       "sales      160.0   14.100000   5.108754  1.6  10.475   13.20   17.325   27.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.describe().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<div id=\"altair-viz-6d102b4b7bbb4655aefb05cc82ff6e4c\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-6d102b4b7bbb4655aefb05cc82ff6e4c\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-6d102b4b7bbb4655aefb05cc82ff6e4c\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"repeat\": {\"column\": [\"sales\", \"TV\", \"radio\", \"newspaper\"]}, \"spec\": {\"data\": {\"name\": \"data-3d599e277b1304f147a07d610894b02d\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"bin\": true, \"field\": {\"repeat\": \"column\"}, \"type\": \"quantitative\"}, \"y\": {\"aggregate\": \"count\", \"type\": \"quantitative\"}}, \"height\": 150, \"width\": 150}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-3d599e277b1304f147a07d610894b02d\": [{\"TV\": 116.0, \"radio\": 7.7, \"newspaper\": 23.1, \"sales\": 11.0}, {\"TV\": 177.0, \"radio\": 9.3, \"newspaper\": 6.4, \"sales\": 12.8}, {\"TV\": 43.1, \"radio\": 26.7, \"newspaper\": 35.1, \"sales\": 10.1}, {\"TV\": 62.3, \"radio\": 12.6, \"newspaper\": 18.3, \"sales\": 9.7}, {\"TV\": 224.0, \"radio\": 2.4, \"newspaper\": 15.6, \"sales\": 11.6}, {\"TV\": 38.2, \"radio\": 3.7, \"newspaper\": 13.8, \"sales\": 7.6}, {\"TV\": 70.6, \"radio\": 16.0, \"newspaper\": 40.8, \"sales\": 10.5}, {\"TV\": 147.3, \"radio\": 23.9, \"newspaper\": 19.1, \"sales\": 14.6}, {\"TV\": 104.6, \"radio\": 5.7, \"newspaper\": 34.4, \"sales\": 10.4}, {\"TV\": 76.3, \"radio\": 27.5, \"newspaper\": 16.0, \"sales\": 12.0}, {\"TV\": 78.2, \"radio\": 46.8, \"newspaper\": 34.5, \"sales\": 14.6}, {\"TV\": 168.4, \"radio\": 7.1, \"newspaper\": 12.8, \"sales\": 11.7}, {\"TV\": 8.7, \"radio\": 48.9, \"newspaper\": 75.0, \"sales\": 7.2}, {\"TV\": 7.8, \"radio\": 38.9, \"newspaper\": 50.6, \"sales\": 6.6}, {\"TV\": 76.4, \"radio\": 0.8, \"newspaper\": 14.8, \"sales\": 9.4}, {\"TV\": 129.4, \"radio\": 5.7, \"newspaper\": 31.3, \"sales\": 11.0}, {\"TV\": 73.4, \"radio\": 17.0, \"newspaper\": 12.9, \"sales\": 10.9}, {\"TV\": 289.7, \"radio\": 42.3, \"newspaper\": 51.2, \"sales\": 25.4}, {\"TV\": 19.6, \"radio\": 20.1, \"newspaper\": 17.0, \"sales\": 7.6}, {\"TV\": 197.6, \"radio\": 3.5, \"newspaper\": 5.9, \"sales\": 11.7}, {\"TV\": 284.3, \"radio\": 10.6, \"newspaper\": 6.4, \"sales\": 15.0}, {\"TV\": 184.9, \"radio\": 21.0, \"newspaper\": 22.0, \"sales\": 15.5}, {\"TV\": 112.9, \"radio\": 17.4, \"newspaper\": 38.6, \"sales\": 11.9}, {\"TV\": 23.8, \"radio\": 35.1, \"newspaper\": 65.9, \"sales\": 9.2}, {\"TV\": 290.7, \"radio\": 4.1, \"newspaper\": 8.5, \"sales\": 12.8}, {\"TV\": 19.4, \"radio\": 16.0, \"newspaper\": 22.3, \"sales\": 6.6}, {\"TV\": 293.6, \"radio\": 27.7, \"newspaper\": 1.8, \"sales\": 20.7}, {\"TV\": 18.7, \"radio\": 12.1, \"newspaper\": 23.4, \"sales\": 6.7}, {\"TV\": 134.3, \"radio\": 4.9, \"newspaper\": 9.3, \"sales\": 11.2}, {\"TV\": 25.6, \"radio\": 39.0, \"newspaper\": 9.3, \"sales\": 9.5}, {\"TV\": 100.4, \"radio\": 9.6, \"newspaper\": 3.6, \"sales\": 10.7}, {\"TV\": 80.2, \"radio\": 0.0, \"newspaper\": 9.2, \"sales\": 8.8}, {\"TV\": 188.4, \"radio\": 18.1, \"newspaper\": 25.6, \"sales\": 14.9}, {\"TV\": 177.0, \"radio\": 33.4, \"newspaper\": 38.7, \"sales\": 17.1}, {\"TV\": 125.7, \"radio\": 36.9, \"newspaper\": 79.2, \"sales\": 15.9}, {\"TV\": 209.6, \"radio\": 20.6, \"newspaper\": 10.7, \"sales\": 15.9}, {\"TV\": 142.9, \"radio\": 29.3, \"newspaper\": 12.6, \"sales\": 15.0}, {\"TV\": 184.9, \"radio\": 43.9, \"newspaper\": 1.7, \"sales\": 20.7}, {\"TV\": 222.4, \"radio\": 4.3, \"newspaper\": 49.8, \"sales\": 11.7}, {\"TV\": 241.7, \"radio\": 38.0, \"newspaper\": 23.2, \"sales\": 21.8}, {\"TV\": 17.2, \"radio\": 45.9, \"newspaper\": 69.3, \"sales\": 9.3}, {\"TV\": 120.5, \"radio\": 28.5, \"newspaper\": 14.2, \"sales\": 14.2}, {\"TV\": 89.7, \"radio\": 9.9, \"newspaper\": 35.7, \"sales\": 10.6}, {\"TV\": 191.1, \"radio\": 28.7, \"newspaper\": 18.2, \"sales\": 17.3}, {\"TV\": 75.5, \"radio\": 10.8, \"newspaper\": 6.0, \"sales\": 9.9}, {\"TV\": 193.2, \"radio\": 18.4, \"newspaper\": 65.7, \"sales\": 15.2}, {\"TV\": 85.7, \"radio\": 35.8, \"newspaper\": 49.3, \"sales\": 13.3}, {\"TV\": 266.9, \"radio\": 43.8, \"newspaper\": 5.0, \"sales\": 25.4}, {\"TV\": 39.5, \"radio\": 41.1, \"newspaper\": 5.8, \"sales\": 10.8}, {\"TV\": 261.3, \"radio\": 42.7, \"newspaper\": 54.7, \"sales\": 24.2}, {\"TV\": 13.2, \"radio\": 15.9, \"newspaper\": 49.6, \"sales\": 5.6}, {\"TV\": 193.7, \"radio\": 35.4, \"newspaper\": 75.6, \"sales\": 19.2}, {\"TV\": 296.4, \"radio\": 36.3, \"newspaper\": 100.9, \"sales\": 23.8}, {\"TV\": 265.6, \"radio\": 20.0, \"newspaper\": 0.3, \"sales\": 17.4}, {\"TV\": 214.7, \"radio\": 24.0, \"newspaper\": 4.0, \"sales\": 17.4}, {\"TV\": 149.7, \"radio\": 35.6, \"newspaper\": 6.0, \"sales\": 17.3}, {\"TV\": 131.7, \"radio\": 18.4, \"newspaper\": 34.6, \"sales\": 12.9}, {\"TV\": 57.5, \"radio\": 32.8, \"newspaper\": 23.5, \"sales\": 11.8}, {\"TV\": 240.1, \"radio\": 16.7, \"newspaper\": 22.9, \"sales\": 15.9}, {\"TV\": 141.3, \"radio\": 26.8, \"newspaper\": 46.2, \"sales\": 15.5}, {\"TV\": 180.8, \"radio\": 10.8, \"newspaper\": 58.4, \"sales\": 12.9}, {\"TV\": 97.2, \"radio\": 1.5, \"newspaper\": 30.0, \"sales\": 9.6}, {\"TV\": 220.5, \"radio\": 33.2, \"newspaper\": 37.9, \"sales\": 20.1}, {\"TV\": 140.3, \"radio\": 1.9, \"newspaper\": 9.0, \"sales\": 10.3}, {\"TV\": 255.4, \"radio\": 26.9, \"newspaper\": 5.5, \"sales\": 19.8}, {\"TV\": 96.2, \"radio\": 14.8, \"newspaper\": 38.9, \"sales\": 11.4}, {\"TV\": 66.1, \"radio\": 5.8, \"newspaper\": 24.2, \"sales\": 8.6}, {\"TV\": 239.3, \"radio\": 15.5, \"newspaper\": 27.3, \"sales\": 15.7}, {\"TV\": 175.7, \"radio\": 15.4, \"newspaper\": 2.4, \"sales\": 14.1}, {\"TV\": 240.1, \"radio\": 7.3, \"newspaper\": 8.7, \"sales\": 13.2}, {\"TV\": 17.9, \"radio\": 37.6, \"newspaper\": 21.6, \"sales\": 8.0}, {\"TV\": 230.1, \"radio\": 37.8, \"newspaper\": 69.2, \"sales\": 22.1}, {\"TV\": 283.6, \"radio\": 42.0, \"newspaper\": 66.2, \"sales\": 25.5}, {\"TV\": 171.3, \"radio\": 39.7, \"newspaper\": 37.7, \"sales\": 19.0}, {\"TV\": 199.1, \"radio\": 30.6, \"newspaper\": 38.7, \"sales\": 18.3}, {\"TV\": 123.1, \"radio\": 34.6, \"newspaper\": 12.4, \"sales\": 15.2}, {\"TV\": 131.1, \"radio\": 42.8, \"newspaper\": 28.9, \"sales\": 18.0}, {\"TV\": 25.1, \"radio\": 25.7, \"newspaper\": 43.3, \"sales\": 8.5}, {\"TV\": 163.5, \"radio\": 36.8, \"newspaper\": 7.4, \"sales\": 18.0}, {\"TV\": 248.8, \"radio\": 27.1, \"newspaper\": 22.9, \"sales\": 18.9}, {\"TV\": 202.5, \"radio\": 22.3, \"newspaper\": 31.6, \"sales\": 16.6}, {\"TV\": 13.1, \"radio\": 0.4, \"newspaper\": 25.6, \"sales\": 5.3}, {\"TV\": 4.1, \"radio\": 11.6, \"newspaper\": 5.7, \"sales\": 3.2}, {\"TV\": 93.9, \"radio\": 43.5, \"newspaper\": 50.5, \"sales\": 15.3}, {\"TV\": 262.9, \"radio\": 3.5, \"newspaper\": 19.5, \"sales\": 12.0}, {\"TV\": 228.3, \"radio\": 16.9, \"newspaper\": 26.2, \"sales\": 15.5}, {\"TV\": 253.8, \"radio\": 21.3, \"newspaper\": 30.0, \"sales\": 17.6}, {\"TV\": 243.2, \"radio\": 49.0, \"newspaper\": 44.3, \"sales\": 25.4}, {\"TV\": 239.8, \"radio\": 4.1, \"newspaper\": 36.9, \"sales\": 12.3}, {\"TV\": 228.0, \"radio\": 37.7, \"newspaper\": 32.0, \"sales\": 21.5}, {\"TV\": 215.4, \"radio\": 23.6, \"newspaper\": 57.6, \"sales\": 17.1}, {\"TV\": 239.9, \"radio\": 41.5, \"newspaper\": 18.5, \"sales\": 23.2}, {\"TV\": 107.4, \"radio\": 14.0, \"newspaper\": 10.9, \"sales\": 11.5}, {\"TV\": 187.8, \"radio\": 21.1, \"newspaper\": 9.5, \"sales\": 15.6}, {\"TV\": 206.9, \"radio\": 8.4, \"newspaper\": 26.4, \"sales\": 12.9}, {\"TV\": 43.0, \"radio\": 25.9, \"newspaper\": 20.5, \"sales\": 9.6}, {\"TV\": 151.5, \"radio\": 41.3, \"newspaper\": 58.5, \"sales\": 18.5}, {\"TV\": 137.9, \"radio\": 46.4, \"newspaper\": 59.0, \"sales\": 19.2}, {\"TV\": 182.6, \"radio\": 46.2, \"newspaper\": 58.7, \"sales\": 21.2}, {\"TV\": 219.8, \"radio\": 33.5, \"newspaper\": 45.1, \"sales\": 19.6}, {\"TV\": 156.6, \"radio\": 2.6, \"newspaper\": 8.3, \"sales\": 10.5}, {\"TV\": 276.7, \"radio\": 2.3, \"newspaper\": 23.7, \"sales\": 11.8}, {\"TV\": 205.0, \"radio\": 45.1, \"newspaper\": 19.6, \"sales\": 22.6}, {\"TV\": 66.9, \"radio\": 11.7, \"newspaper\": 36.8, \"sales\": 9.7}, {\"TV\": 76.4, \"radio\": 26.7, \"newspaper\": 22.3, \"sales\": 11.8}, {\"TV\": 95.7, \"radio\": 1.4, \"newspaper\": 7.4, \"sales\": 9.5}, {\"TV\": 120.2, \"radio\": 19.6, \"newspaper\": 11.6, \"sales\": 13.2}, {\"TV\": 225.8, \"radio\": 8.2, \"newspaper\": 56.5, \"sales\": 13.4}, {\"TV\": 28.6, \"radio\": 1.5, \"newspaper\": 33.0, \"sales\": 7.3}, {\"TV\": 68.4, \"radio\": 44.5, \"newspaper\": 35.6, \"sales\": 13.6}, {\"TV\": 248.4, \"radio\": 30.2, \"newspaper\": 20.3, \"sales\": 20.2}, {\"TV\": 218.5, \"radio\": 5.4, \"newspaper\": 27.4, \"sales\": 12.2}, {\"TV\": 109.8, \"radio\": 47.8, \"newspaper\": 51.4, \"sales\": 16.7}, {\"TV\": 8.6, \"radio\": 2.1, \"newspaper\": 1.0, \"sales\": 4.8}, {\"TV\": 97.5, \"radio\": 7.6, \"newspaper\": 7.2, \"sales\": 9.7}, {\"TV\": 210.7, \"radio\": 29.5, \"newspaper\": 9.3, \"sales\": 18.4}, {\"TV\": 164.5, \"radio\": 20.9, \"newspaper\": 47.4, \"sales\": 14.5}, {\"TV\": 265.2, \"radio\": 2.9, \"newspaper\": 43.0, \"sales\": 12.7}, {\"TV\": 281.4, \"radio\": 39.6, \"newspaper\": 55.8, \"sales\": 24.4}, {\"TV\": 26.8, \"radio\": 33.0, \"newspaper\": 19.3, \"sales\": 8.8}, {\"TV\": 276.9, \"radio\": 48.9, \"newspaper\": 41.8, \"sales\": 27.0}, {\"TV\": 36.9, \"radio\": 38.6, \"newspaper\": 65.6, \"sales\": 10.8}, {\"TV\": 206.8, \"radio\": 5.2, \"newspaper\": 19.4, \"sales\": 12.2}, {\"TV\": 287.6, \"radio\": 43.0, \"newspaper\": 71.8, \"sales\": 26.2}, {\"TV\": 102.7, \"radio\": 29.6, \"newspaper\": 8.4, \"sales\": 14.0}, {\"TV\": 262.7, \"radio\": 28.8, \"newspaper\": 15.9, \"sales\": 20.2}, {\"TV\": 90.4, \"radio\": 0.3, \"newspaper\": 23.2, \"sales\": 8.7}, {\"TV\": 199.8, \"radio\": 3.1, \"newspaper\": 34.6, \"sales\": 11.4}, {\"TV\": 94.2, \"radio\": 4.9, \"newspaper\": 8.1, \"sales\": 9.7}, {\"TV\": 210.8, \"radio\": 49.6, \"newspaper\": 37.7, \"sales\": 23.8}, {\"TV\": 227.2, \"radio\": 15.8, \"newspaper\": 49.9, \"sales\": 14.8}, {\"TV\": 88.3, \"radio\": 25.5, \"newspaper\": 73.4, \"sales\": 12.9}, {\"TV\": 237.4, \"radio\": 5.1, \"newspaper\": 23.5, \"sales\": 12.5}, {\"TV\": 136.2, \"radio\": 19.2, \"newspaper\": 16.6, \"sales\": 13.2}, {\"TV\": 172.5, \"radio\": 18.1, \"newspaper\": 30.7, \"sales\": 14.4}, {\"TV\": 17.2, \"radio\": 4.1, \"newspaper\": 31.6, \"sales\": 5.9}, {\"TV\": 59.6, \"radio\": 12.0, \"newspaper\": 43.1, \"sales\": 9.7}, {\"TV\": 74.7, \"radio\": 49.4, \"newspaper\": 45.7, \"sales\": 14.7}, {\"TV\": 149.8, \"radio\": 1.3, \"newspaper\": 24.3, \"sales\": 10.1}, {\"TV\": 166.8, \"radio\": 42.0, \"newspaper\": 3.6, \"sales\": 19.6}, {\"TV\": 44.5, \"radio\": 39.3, \"newspaper\": 45.1, \"sales\": 10.4}, {\"TV\": 216.4, \"radio\": 41.7, \"newspaper\": 39.6, \"sales\": 22.6}, {\"TV\": 44.7, \"radio\": 25.8, \"newspaper\": 20.6, \"sales\": 10.1}, {\"TV\": 0.7, \"radio\": 39.6, \"newspaper\": 8.7, \"sales\": 1.6}, {\"TV\": 121.0, \"radio\": 8.4, \"newspaper\": 48.7, \"sales\": 11.6}, {\"TV\": 187.9, \"radio\": 17.2, \"newspaper\": 17.9, \"sales\": 14.7}, {\"TV\": 135.2, \"radio\": 41.7, \"newspaper\": 45.9, \"sales\": 17.2}, {\"TV\": 139.2, \"radio\": 14.3, \"newspaper\": 25.6, \"sales\": 12.2}, {\"TV\": 110.7, \"radio\": 40.6, \"newspaper\": 63.2, \"sales\": 16.0}, {\"TV\": 213.4, \"radio\": 24.6, \"newspaper\": 13.1, \"sales\": 17.0}, {\"TV\": 18.8, \"radio\": 21.7, \"newspaper\": 50.4, \"sales\": 7.0}, {\"TV\": 232.1, \"radio\": 8.6, \"newspaper\": 8.7, \"sales\": 13.4}, {\"TV\": 218.4, \"radio\": 27.7, \"newspaper\": 53.4, \"sales\": 18.0}, {\"TV\": 286.0, \"radio\": 13.9, \"newspaper\": 3.7, \"sales\": 15.9}, {\"TV\": 109.8, \"radio\": 14.3, \"newspaper\": 31.7, \"sales\": 12.4}, {\"TV\": 25.0, \"radio\": 11.0, \"newspaper\": 29.7, \"sales\": 7.2}, {\"TV\": 204.1, \"radio\": 32.9, \"newspaper\": 46.0, \"sales\": 19.0}, {\"TV\": 217.7, \"radio\": 33.5, \"newspaper\": 59.0, \"sales\": 19.4}, {\"TV\": 165.6, \"radio\": 10.0, \"newspaper\": 17.6, \"sales\": 12.6}, {\"TV\": 280.2, \"radio\": 10.1, \"newspaper\": 21.4, \"sales\": 14.8}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.RepeatChart(...)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alt.Chart(df_train).mark_bar().encode(\n",
    "    alt.X(alt.repeat(\"column\"), type=\"quantitative\", bin=True),\n",
    "    y='count()',\n",
    ").properties(\n",
    "    width=150,\n",
    "    height=150\n",
    ").repeat(\n",
    "    column=['sales', 'TV', 'radio', 'newspaper']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<div id=\"altair-viz-eb57be22238c47cf8a5f06515f2b2146\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-eb57be22238c47cf8a5f06515f2b2146\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-eb57be22238c47cf8a5f06515f2b2146\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"repeat\": {\"column\": [\"sales\", \"TV\", \"radio\", \"newspaper\"], \"row\": [\"sales\", \"TV\", \"radio\", \"newspaper\"]}, \"spec\": {\"data\": {\"name\": \"data-3d599e277b1304f147a07d610894b02d\"}, \"mark\": \"circle\", \"encoding\": {\"x\": {\"field\": {\"repeat\": \"column\"}, \"type\": \"quantitative\"}, \"y\": {\"field\": {\"repeat\": \"row\"}, \"type\": \"quantitative\"}}, \"height\": 150, \"selection\": {\"selector001\": {\"type\": \"interval\", \"bind\": \"scales\", \"encodings\": [\"x\", \"y\"]}}, \"width\": 150}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-3d599e277b1304f147a07d610894b02d\": [{\"TV\": 116.0, \"radio\": 7.7, \"newspaper\": 23.1, \"sales\": 11.0}, {\"TV\": 177.0, \"radio\": 9.3, \"newspaper\": 6.4, \"sales\": 12.8}, {\"TV\": 43.1, \"radio\": 26.7, \"newspaper\": 35.1, \"sales\": 10.1}, {\"TV\": 62.3, \"radio\": 12.6, \"newspaper\": 18.3, \"sales\": 9.7}, {\"TV\": 224.0, \"radio\": 2.4, \"newspaper\": 15.6, \"sales\": 11.6}, {\"TV\": 38.2, \"radio\": 3.7, \"newspaper\": 13.8, \"sales\": 7.6}, {\"TV\": 70.6, \"radio\": 16.0, \"newspaper\": 40.8, \"sales\": 10.5}, {\"TV\": 147.3, \"radio\": 23.9, \"newspaper\": 19.1, \"sales\": 14.6}, {\"TV\": 104.6, \"radio\": 5.7, \"newspaper\": 34.4, \"sales\": 10.4}, {\"TV\": 76.3, \"radio\": 27.5, \"newspaper\": 16.0, \"sales\": 12.0}, {\"TV\": 78.2, \"radio\": 46.8, \"newspaper\": 34.5, \"sales\": 14.6}, {\"TV\": 168.4, \"radio\": 7.1, \"newspaper\": 12.8, \"sales\": 11.7}, {\"TV\": 8.7, \"radio\": 48.9, \"newspaper\": 75.0, \"sales\": 7.2}, {\"TV\": 7.8, \"radio\": 38.9, \"newspaper\": 50.6, \"sales\": 6.6}, {\"TV\": 76.4, \"radio\": 0.8, \"newspaper\": 14.8, \"sales\": 9.4}, {\"TV\": 129.4, \"radio\": 5.7, \"newspaper\": 31.3, \"sales\": 11.0}, {\"TV\": 73.4, \"radio\": 17.0, \"newspaper\": 12.9, \"sales\": 10.9}, {\"TV\": 289.7, \"radio\": 42.3, \"newspaper\": 51.2, \"sales\": 25.4}, {\"TV\": 19.6, \"radio\": 20.1, \"newspaper\": 17.0, \"sales\": 7.6}, {\"TV\": 197.6, \"radio\": 3.5, \"newspaper\": 5.9, \"sales\": 11.7}, {\"TV\": 284.3, \"radio\": 10.6, \"newspaper\": 6.4, \"sales\": 15.0}, {\"TV\": 184.9, \"radio\": 21.0, \"newspaper\": 22.0, \"sales\": 15.5}, {\"TV\": 112.9, \"radio\": 17.4, \"newspaper\": 38.6, \"sales\": 11.9}, {\"TV\": 23.8, \"radio\": 35.1, \"newspaper\": 65.9, \"sales\": 9.2}, {\"TV\": 290.7, \"radio\": 4.1, \"newspaper\": 8.5, \"sales\": 12.8}, {\"TV\": 19.4, \"radio\": 16.0, \"newspaper\": 22.3, \"sales\": 6.6}, {\"TV\": 293.6, \"radio\": 27.7, \"newspaper\": 1.8, \"sales\": 20.7}, {\"TV\": 18.7, \"radio\": 12.1, \"newspaper\": 23.4, \"sales\": 6.7}, {\"TV\": 134.3, \"radio\": 4.9, \"newspaper\": 9.3, \"sales\": 11.2}, {\"TV\": 25.6, \"radio\": 39.0, \"newspaper\": 9.3, \"sales\": 9.5}, {\"TV\": 100.4, \"radio\": 9.6, \"newspaper\": 3.6, \"sales\": 10.7}, {\"TV\": 80.2, \"radio\": 0.0, \"newspaper\": 9.2, \"sales\": 8.8}, {\"TV\": 188.4, \"radio\": 18.1, \"newspaper\": 25.6, \"sales\": 14.9}, {\"TV\": 177.0, \"radio\": 33.4, \"newspaper\": 38.7, \"sales\": 17.1}, {\"TV\": 125.7, \"radio\": 36.9, \"newspaper\": 79.2, \"sales\": 15.9}, {\"TV\": 209.6, \"radio\": 20.6, \"newspaper\": 10.7, \"sales\": 15.9}, {\"TV\": 142.9, \"radio\": 29.3, \"newspaper\": 12.6, \"sales\": 15.0}, {\"TV\": 184.9, \"radio\": 43.9, \"newspaper\": 1.7, \"sales\": 20.7}, {\"TV\": 222.4, \"radio\": 4.3, \"newspaper\": 49.8, \"sales\": 11.7}, {\"TV\": 241.7, \"radio\": 38.0, \"newspaper\": 23.2, \"sales\": 21.8}, {\"TV\": 17.2, \"radio\": 45.9, \"newspaper\": 69.3, \"sales\": 9.3}, {\"TV\": 120.5, \"radio\": 28.5, \"newspaper\": 14.2, \"sales\": 14.2}, {\"TV\": 89.7, \"radio\": 9.9, \"newspaper\": 35.7, \"sales\": 10.6}, {\"TV\": 191.1, \"radio\": 28.7, \"newspaper\": 18.2, \"sales\": 17.3}, {\"TV\": 75.5, \"radio\": 10.8, \"newspaper\": 6.0, \"sales\": 9.9}, {\"TV\": 193.2, \"radio\": 18.4, \"newspaper\": 65.7, \"sales\": 15.2}, {\"TV\": 85.7, \"radio\": 35.8, \"newspaper\": 49.3, \"sales\": 13.3}, {\"TV\": 266.9, \"radio\": 43.8, \"newspaper\": 5.0, \"sales\": 25.4}, {\"TV\": 39.5, \"radio\": 41.1, \"newspaper\": 5.8, \"sales\": 10.8}, {\"TV\": 261.3, \"radio\": 42.7, \"newspaper\": 54.7, \"sales\": 24.2}, {\"TV\": 13.2, \"radio\": 15.9, \"newspaper\": 49.6, \"sales\": 5.6}, {\"TV\": 193.7, \"radio\": 35.4, \"newspaper\": 75.6, \"sales\": 19.2}, {\"TV\": 296.4, \"radio\": 36.3, \"newspaper\": 100.9, \"sales\": 23.8}, {\"TV\": 265.6, \"radio\": 20.0, \"newspaper\": 0.3, \"sales\": 17.4}, {\"TV\": 214.7, \"radio\": 24.0, \"newspaper\": 4.0, \"sales\": 17.4}, {\"TV\": 149.7, \"radio\": 35.6, \"newspaper\": 6.0, \"sales\": 17.3}, {\"TV\": 131.7, \"radio\": 18.4, \"newspaper\": 34.6, \"sales\": 12.9}, {\"TV\": 57.5, \"radio\": 32.8, \"newspaper\": 23.5, \"sales\": 11.8}, {\"TV\": 240.1, \"radio\": 16.7, \"newspaper\": 22.9, \"sales\": 15.9}, {\"TV\": 141.3, \"radio\": 26.8, \"newspaper\": 46.2, \"sales\": 15.5}, {\"TV\": 180.8, \"radio\": 10.8, \"newspaper\": 58.4, \"sales\": 12.9}, {\"TV\": 97.2, \"radio\": 1.5, \"newspaper\": 30.0, \"sales\": 9.6}, {\"TV\": 220.5, \"radio\": 33.2, \"newspaper\": 37.9, \"sales\": 20.1}, {\"TV\": 140.3, \"radio\": 1.9, \"newspaper\": 9.0, \"sales\": 10.3}, {\"TV\": 255.4, \"radio\": 26.9, \"newspaper\": 5.5, \"sales\": 19.8}, {\"TV\": 96.2, \"radio\": 14.8, \"newspaper\": 38.9, \"sales\": 11.4}, {\"TV\": 66.1, \"radio\": 5.8, \"newspaper\": 24.2, \"sales\": 8.6}, {\"TV\": 239.3, \"radio\": 15.5, \"newspaper\": 27.3, \"sales\": 15.7}, {\"TV\": 175.7, \"radio\": 15.4, \"newspaper\": 2.4, \"sales\": 14.1}, {\"TV\": 240.1, \"radio\": 7.3, \"newspaper\": 8.7, \"sales\": 13.2}, {\"TV\": 17.9, \"radio\": 37.6, \"newspaper\": 21.6, \"sales\": 8.0}, {\"TV\": 230.1, \"radio\": 37.8, \"newspaper\": 69.2, \"sales\": 22.1}, {\"TV\": 283.6, \"radio\": 42.0, \"newspaper\": 66.2, \"sales\": 25.5}, {\"TV\": 171.3, \"radio\": 39.7, \"newspaper\": 37.7, \"sales\": 19.0}, {\"TV\": 199.1, \"radio\": 30.6, \"newspaper\": 38.7, \"sales\": 18.3}, {\"TV\": 123.1, \"radio\": 34.6, \"newspaper\": 12.4, \"sales\": 15.2}, {\"TV\": 131.1, \"radio\": 42.8, \"newspaper\": 28.9, \"sales\": 18.0}, {\"TV\": 25.1, \"radio\": 25.7, \"newspaper\": 43.3, \"sales\": 8.5}, {\"TV\": 163.5, \"radio\": 36.8, \"newspaper\": 7.4, \"sales\": 18.0}, {\"TV\": 248.8, \"radio\": 27.1, \"newspaper\": 22.9, \"sales\": 18.9}, {\"TV\": 202.5, \"radio\": 22.3, \"newspaper\": 31.6, \"sales\": 16.6}, {\"TV\": 13.1, \"radio\": 0.4, \"newspaper\": 25.6, \"sales\": 5.3}, {\"TV\": 4.1, \"radio\": 11.6, \"newspaper\": 5.7, \"sales\": 3.2}, {\"TV\": 93.9, \"radio\": 43.5, \"newspaper\": 50.5, \"sales\": 15.3}, {\"TV\": 262.9, \"radio\": 3.5, \"newspaper\": 19.5, \"sales\": 12.0}, {\"TV\": 228.3, \"radio\": 16.9, \"newspaper\": 26.2, \"sales\": 15.5}, {\"TV\": 253.8, \"radio\": 21.3, \"newspaper\": 30.0, \"sales\": 17.6}, {\"TV\": 243.2, \"radio\": 49.0, \"newspaper\": 44.3, \"sales\": 25.4}, {\"TV\": 239.8, \"radio\": 4.1, \"newspaper\": 36.9, \"sales\": 12.3}, {\"TV\": 228.0, \"radio\": 37.7, \"newspaper\": 32.0, \"sales\": 21.5}, {\"TV\": 215.4, \"radio\": 23.6, \"newspaper\": 57.6, \"sales\": 17.1}, {\"TV\": 239.9, \"radio\": 41.5, \"newspaper\": 18.5, \"sales\": 23.2}, {\"TV\": 107.4, \"radio\": 14.0, \"newspaper\": 10.9, \"sales\": 11.5}, {\"TV\": 187.8, \"radio\": 21.1, \"newspaper\": 9.5, \"sales\": 15.6}, {\"TV\": 206.9, \"radio\": 8.4, \"newspaper\": 26.4, \"sales\": 12.9}, {\"TV\": 43.0, \"radio\": 25.9, \"newspaper\": 20.5, \"sales\": 9.6}, {\"TV\": 151.5, \"radio\": 41.3, \"newspaper\": 58.5, \"sales\": 18.5}, {\"TV\": 137.9, \"radio\": 46.4, \"newspaper\": 59.0, \"sales\": 19.2}, {\"TV\": 182.6, \"radio\": 46.2, \"newspaper\": 58.7, \"sales\": 21.2}, {\"TV\": 219.8, \"radio\": 33.5, \"newspaper\": 45.1, \"sales\": 19.6}, {\"TV\": 156.6, \"radio\": 2.6, \"newspaper\": 8.3, \"sales\": 10.5}, {\"TV\": 276.7, \"radio\": 2.3, \"newspaper\": 23.7, \"sales\": 11.8}, {\"TV\": 205.0, \"radio\": 45.1, \"newspaper\": 19.6, \"sales\": 22.6}, {\"TV\": 66.9, \"radio\": 11.7, \"newspaper\": 36.8, \"sales\": 9.7}, {\"TV\": 76.4, \"radio\": 26.7, \"newspaper\": 22.3, \"sales\": 11.8}, {\"TV\": 95.7, \"radio\": 1.4, \"newspaper\": 7.4, \"sales\": 9.5}, {\"TV\": 120.2, \"radio\": 19.6, \"newspaper\": 11.6, \"sales\": 13.2}, {\"TV\": 225.8, \"radio\": 8.2, \"newspaper\": 56.5, \"sales\": 13.4}, {\"TV\": 28.6, \"radio\": 1.5, \"newspaper\": 33.0, \"sales\": 7.3}, {\"TV\": 68.4, \"radio\": 44.5, \"newspaper\": 35.6, \"sales\": 13.6}, {\"TV\": 248.4, \"radio\": 30.2, \"newspaper\": 20.3, \"sales\": 20.2}, {\"TV\": 218.5, \"radio\": 5.4, \"newspaper\": 27.4, \"sales\": 12.2}, {\"TV\": 109.8, \"radio\": 47.8, \"newspaper\": 51.4, \"sales\": 16.7}, {\"TV\": 8.6, \"radio\": 2.1, \"newspaper\": 1.0, \"sales\": 4.8}, {\"TV\": 97.5, \"radio\": 7.6, \"newspaper\": 7.2, \"sales\": 9.7}, {\"TV\": 210.7, \"radio\": 29.5, \"newspaper\": 9.3, \"sales\": 18.4}, {\"TV\": 164.5, \"radio\": 20.9, \"newspaper\": 47.4, \"sales\": 14.5}, {\"TV\": 265.2, \"radio\": 2.9, \"newspaper\": 43.0, \"sales\": 12.7}, {\"TV\": 281.4, \"radio\": 39.6, \"newspaper\": 55.8, \"sales\": 24.4}, {\"TV\": 26.8, \"radio\": 33.0, \"newspaper\": 19.3, \"sales\": 8.8}, {\"TV\": 276.9, \"radio\": 48.9, \"newspaper\": 41.8, \"sales\": 27.0}, {\"TV\": 36.9, \"radio\": 38.6, \"newspaper\": 65.6, \"sales\": 10.8}, {\"TV\": 206.8, \"radio\": 5.2, \"newspaper\": 19.4, \"sales\": 12.2}, {\"TV\": 287.6, \"radio\": 43.0, \"newspaper\": 71.8, \"sales\": 26.2}, {\"TV\": 102.7, \"radio\": 29.6, \"newspaper\": 8.4, \"sales\": 14.0}, {\"TV\": 262.7, \"radio\": 28.8, \"newspaper\": 15.9, \"sales\": 20.2}, {\"TV\": 90.4, \"radio\": 0.3, \"newspaper\": 23.2, \"sales\": 8.7}, {\"TV\": 199.8, \"radio\": 3.1, \"newspaper\": 34.6, \"sales\": 11.4}, {\"TV\": 94.2, \"radio\": 4.9, \"newspaper\": 8.1, \"sales\": 9.7}, {\"TV\": 210.8, \"radio\": 49.6, \"newspaper\": 37.7, \"sales\": 23.8}, {\"TV\": 227.2, \"radio\": 15.8, \"newspaper\": 49.9, \"sales\": 14.8}, {\"TV\": 88.3, \"radio\": 25.5, \"newspaper\": 73.4, \"sales\": 12.9}, {\"TV\": 237.4, \"radio\": 5.1, \"newspaper\": 23.5, \"sales\": 12.5}, {\"TV\": 136.2, \"radio\": 19.2, \"newspaper\": 16.6, \"sales\": 13.2}, {\"TV\": 172.5, \"radio\": 18.1, \"newspaper\": 30.7, \"sales\": 14.4}, {\"TV\": 17.2, \"radio\": 4.1, \"newspaper\": 31.6, \"sales\": 5.9}, {\"TV\": 59.6, \"radio\": 12.0, \"newspaper\": 43.1, \"sales\": 9.7}, {\"TV\": 74.7, \"radio\": 49.4, \"newspaper\": 45.7, \"sales\": 14.7}, {\"TV\": 149.8, \"radio\": 1.3, \"newspaper\": 24.3, \"sales\": 10.1}, {\"TV\": 166.8, \"radio\": 42.0, \"newspaper\": 3.6, \"sales\": 19.6}, {\"TV\": 44.5, \"radio\": 39.3, \"newspaper\": 45.1, \"sales\": 10.4}, {\"TV\": 216.4, \"radio\": 41.7, \"newspaper\": 39.6, \"sales\": 22.6}, {\"TV\": 44.7, \"radio\": 25.8, \"newspaper\": 20.6, \"sales\": 10.1}, {\"TV\": 0.7, \"radio\": 39.6, \"newspaper\": 8.7, \"sales\": 1.6}, {\"TV\": 121.0, \"radio\": 8.4, \"newspaper\": 48.7, \"sales\": 11.6}, {\"TV\": 187.9, \"radio\": 17.2, \"newspaper\": 17.9, \"sales\": 14.7}, {\"TV\": 135.2, \"radio\": 41.7, \"newspaper\": 45.9, \"sales\": 17.2}, {\"TV\": 139.2, \"radio\": 14.3, \"newspaper\": 25.6, \"sales\": 12.2}, {\"TV\": 110.7, \"radio\": 40.6, \"newspaper\": 63.2, \"sales\": 16.0}, {\"TV\": 213.4, \"radio\": 24.6, \"newspaper\": 13.1, \"sales\": 17.0}, {\"TV\": 18.8, \"radio\": 21.7, \"newspaper\": 50.4, \"sales\": 7.0}, {\"TV\": 232.1, \"radio\": 8.6, \"newspaper\": 8.7, \"sales\": 13.4}, {\"TV\": 218.4, \"radio\": 27.7, \"newspaper\": 53.4, \"sales\": 18.0}, {\"TV\": 286.0, \"radio\": 13.9, \"newspaper\": 3.7, \"sales\": 15.9}, {\"TV\": 109.8, \"radio\": 14.3, \"newspaper\": 31.7, \"sales\": 12.4}, {\"TV\": 25.0, \"radio\": 11.0, \"newspaper\": 29.7, \"sales\": 7.2}, {\"TV\": 204.1, \"radio\": 32.9, \"newspaper\": 46.0, \"sales\": 19.0}, {\"TV\": 217.7, \"radio\": 33.5, \"newspaper\": 59.0, \"sales\": 19.4}, {\"TV\": 165.6, \"radio\": 10.0, \"newspaper\": 17.6, \"sales\": 12.6}, {\"TV\": 280.2, \"radio\": 10.1, \"newspaper\": 21.4, \"sales\": 14.8}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.RepeatChart(...)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alt.Chart(df_train).mark_circle().encode(\n",
    "    alt.X(alt.repeat(\"column\"), type='quantitative'),\n",
    "    alt.Y(alt.repeat(\"row\"), type='quantitative')\n",
    ").properties(\n",
    "    width=150,\n",
    "    height=150\n",
    ").repeat(\n",
    "    row=['sales', 'TV', 'radio', 'newspaper'],\n",
    "    column=['sales', 'TV', 'radio', 'newspaper']\n",
    ").interactive()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sales        1.000000\n",
       "TV           0.768874\n",
       "radio        0.592373\n",
       "newspaper    0.237874\n",
       "Name: sales, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# inspect correlation between outcome and possible predictors\n",
    "corr = df_train.corr()\n",
    "corr[y_label].sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_098cb_row0_col0, #T_098cb_row1_col1, #T_098cb_row2_col2, #T_098cb_row3_col3 {\n",
       "  background-color: #08306b;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_098cb_row0_col1, #T_098cb_row0_col2, #T_098cb_row2_col0, #T_098cb_row2_col3 {\n",
       "  background-color: #f7fbff;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_098cb_row0_col3 {\n",
       "  background-color: #2f7fbc;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_098cb_row1_col0 {\n",
       "  background-color: #f0f6fd;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_098cb_row1_col2 {\n",
       "  background-color: #9dcae1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_098cb_row1_col3 {\n",
       "  background-color: #79b5d9;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_098cb_row2_col1 {\n",
       "  background-color: #a5cde3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_098cb_row3_col0 {\n",
       "  background-color: #1e6db2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_098cb_row3_col1 {\n",
       "  background-color: #549fcd;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_098cb_row3_col2 {\n",
       "  background-color: #cbdef1;\n",
       "  color: #000000;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_098cb\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_098cb_level0_col0\" class=\"col_heading level0 col0\" >TV</th>\n",
       "      <th id=\"T_098cb_level0_col1\" class=\"col_heading level0 col1\" >radio</th>\n",
       "      <th id=\"T_098cb_level0_col2\" class=\"col_heading level0 col2\" >newspaper</th>\n",
       "      <th id=\"T_098cb_level0_col3\" class=\"col_heading level0 col3\" >sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_098cb_level0_row0\" class=\"row_heading level0 row0\" >TV</th>\n",
       "      <td id=\"T_098cb_row0_col0\" class=\"data row0 col0\" >1.000000</td>\n",
       "      <td id=\"T_098cb_row0_col1\" class=\"data row0 col1\" >0.053872</td>\n",
       "      <td id=\"T_098cb_row0_col2\" class=\"data row0 col2\" >0.019084</td>\n",
       "      <td id=\"T_098cb_row0_col3\" class=\"data row0 col3\" >0.768874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_098cb_level0_row1\" class=\"row_heading level0 row1\" >radio</th>\n",
       "      <td id=\"T_098cb_row1_col0\" class=\"data row1 col0\" >0.053872</td>\n",
       "      <td id=\"T_098cb_row1_col1\" class=\"data row1 col1\" >1.000000</td>\n",
       "      <td id=\"T_098cb_row1_col2\" class=\"data row1 col2\" >0.388074</td>\n",
       "      <td id=\"T_098cb_row1_col3\" class=\"data row1 col3\" >0.592373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_098cb_level0_row2\" class=\"row_heading level0 row2\" >newspaper</th>\n",
       "      <td id=\"T_098cb_row2_col0\" class=\"data row2 col0\" >0.019084</td>\n",
       "      <td id=\"T_098cb_row2_col1\" class=\"data row2 col1\" >0.388074</td>\n",
       "      <td id=\"T_098cb_row2_col2\" class=\"data row2 col2\" >1.000000</td>\n",
       "      <td id=\"T_098cb_row2_col3\" class=\"data row2 col3\" >0.237874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_098cb_level0_row3\" class=\"row_heading level0 row3\" >sales</th>\n",
       "      <td id=\"T_098cb_row3_col0\" class=\"data row3 col0\" >0.768874</td>\n",
       "      <td id=\"T_098cb_row3_col1\" class=\"data row3 col1\" >0.592373</td>\n",
       "      <td id=\"T_098cb_row3_col2\" class=\"data row3 col2\" >0.237874</td>\n",
       "      <td id=\"T_098cb_row3_col3\" class=\"data row3 col3\" >1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x182be9be0>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# take a look at all correlations\n",
    "corr.style.background_gradient(cmap='Blues')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select the linear regression model\n",
    "reg = LinearRegression()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training and validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cross-validation with 5 folds\n",
    "scores = cross_val_score(reg, \n",
    "                         X_train, \n",
    "                         y_train, \n",
    "                         cv=5, \n",
    "                         scoring='neg_mean_squared_error') *-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_feb68_row0_col0 {\n",
       "  background-color: #084184;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "#T_feb68_row1_col0 {\n",
       "  background-color: #f7fbff;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_feb68_row2_col0 {\n",
       "  background-color: #cde0f1;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_feb68_row3_col0 {\n",
       "  background-color: #a3cce3;\n",
       "  color: #000000;\n",
       "}\n",
       "#T_feb68_row4_col0 {\n",
       "  background-color: #08306b;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_feb68\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_feb68_level0_col0\" class=\"col_heading level0 col0\" >lr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_feb68_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
       "      <td id=\"T_feb68_row0_col0\" class=\"data row0 col0\" >4.192954</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_feb68_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
       "      <td id=\"T_feb68_row1_col0\" class=\"data row1 col0\" >1.500644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_feb68_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
       "      <td id=\"T_feb68_row2_col0\" class=\"data row2 col0\" >2.109080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_feb68_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
       "      <td id=\"T_feb68_row3_col0\" class=\"data row3 col0\" >2.541355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_feb68_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
       "      <td id=\"T_feb68_row4_col0\" class=\"data row4 col0\" >4.372931</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x18460a400>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# store cross-validation scores\n",
    "df_scores = pd.DataFrame({\"lr\": scores})\n",
    "df_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# reset index to match the number of folds\n",
    "df_scores.index += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print dataframe\n",
    "df_scores.style.background_gradient(cmap='Blues')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<div id=\"altair-viz-537abc3e3fdd41a3ba96a2378f6917dd\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-537abc3e3fdd41a3ba96a2378f6917dd\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-537abc3e3fdd41a3ba96a2378f6917dd\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-aadd69d2c17db71c412d2efefb460a35\"}, \"mark\": {\"type\": \"line\", \"point\": {}}, \"encoding\": {\"x\": {\"axis\": {\"tickCount\": 5}, \"bin\": false, \"field\": \"index\", \"title\": \"Fold\", \"type\": \"quantitative\"}, \"y\": {\"aggregate\": \"mean\", \"field\": \"lr\", \"title\": \"Mean squared error (MSE)\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-aadd69d2c17db71c412d2efefb460a35\": [{\"index\": 1, \"lr\": 4.192953526576346}, {\"index\": 2, \"lr\": 1.5006440155719944}, {\"index\": 3, \"lr\": 2.1090796685677944}, {\"index\": 4, \"lr\": 2.5413551427273604}, {\"index\": 5, \"lr\": 4.372930659506719}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alt.Chart(df_scores.reset_index()).mark_line(\n",
    "     point=alt.OverlayMarkDef()\n",
    ").encode(\n",
    "    x=alt.X(\"index\", bin=False, title=\"Fold\", axis=alt.Axis(tickCount=5)),\n",
    "    y=alt.Y(\"lr\", aggregate=\"mean\", title=\"Mean squared error (MSE)\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>lr</th>\n",
       "      <td>5.0</td>\n",
       "      <td>2.943393</td>\n",
       "      <td>1.279083</td>\n",
       "      <td>1.500644</td>\n",
       "      <td>2.10908</td>\n",
       "      <td>2.541355</td>\n",
       "      <td>4.192954</td>\n",
       "      <td>4.372931</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    count      mean       std       min      25%       50%       75%       max\n",
       "lr    5.0  2.943393  1.279083  1.500644  2.10908  2.541355  4.192954  4.372931"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_scores.describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fit model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LinearRegression</label><div class=\"sk-toggleable__content\"><pre>LinearRegression()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Fit the model to the complete training data\n",
    "reg.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Coefficients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Coefficient</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Intercept</td>\n",
       "      <td>2.979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TV</td>\n",
       "      <td>0.045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>radio</td>\n",
       "      <td>0.189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>newspaper</td>\n",
       "      <td>0.003</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Name  Coefficient\n",
       "0  Intercept        2.979\n",
       "1         TV        0.045\n",
       "2      radio        0.189\n",
       "3  newspaper        0.003"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# intercept\n",
    "intercept = pd.DataFrame({\n",
    "    \"Name\": [\"Intercept\"],\n",
    "    \"Coefficient\":[reg.intercept_]}\n",
    "    )\n",
    "\n",
    "# make a slope table\n",
    "slope = pd.DataFrame({\n",
    "    \"Name\": features,\n",
    "    \"Coefficient\": reg.coef_}\n",
    ")\n",
    "\n",
    "# combine estimates of intercept and slopes\n",
    "table = pd.concat([intercept, slope], ignore_index=True, sort=False)\n",
    "\n",
    "round(table, 3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluation on test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "# obtain predictions\n",
    "y_pred = reg.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.899"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# R squared\n",
    "r2_score(y_test, y_pred).round(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.174"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# MSE\n",
    "mean_squared_error(y_test, y_pred).round(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.782"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# RMSE\n",
    "mean_squared_error(y_test, y_pred, squared=False).round(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.461"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# MAE\n",
    "mean_absolute_error(y_test, y_pred).round(3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.12 ('base')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "463226f144cc21b006ce6927bfc93dd00694e52c8bc6857abb6e555b983749e9"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}