{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# EDA on Sentiment Data\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:ekorpkit.base:IPython version: (6, 9, 0), client: jupyter_client\n", "INFO:ekorpkit.base:Google Colab not detected.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "version: 0.1.35+0.g69734d6.dirty\n", "is notebook? True\n", "is colab? False\n", "evironment varialbles:\n", "{'CUDA_DEVICE_ORDER': None,\n", " 'CUDA_VISIBLE_DEVICES': None,\n", " 'EKORPKIT_CONFIG_DIR': '/workspace/projects/ekorpkit-book/config',\n", " 'EKORPKIT_DATA_DIR': None,\n", " 'EKORPKIT_LOG_LEVEL': 'WARNING',\n", " 'EKORPKIT_PROJECT': 'ekorpkit-book',\n", " 'EKORPKIT_WORKSPACE_ROOT': '/workspace',\n", " 'KMP_DUPLICATE_LIB_OK': 'TRUE',\n", " 'NUM_WORKERS': 230}\n" ] } ], "source": [ "%config InlineBackend.figure_format='retina'\n", "from ekorpkit import eKonf\n", "\n", "eKonf.setLogger(\"WARNING\")\n", "print(\"version:\", eKonf.__version__)\n", "print(\"is notebook?\", eKonf.is_notebook())\n", "print(\"is colab?\", eKonf.is_colab())\n", "print(\"evironment varialbles:\")\n", "eKonf.print(eKonf.env().dict())" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydantic.types.SecretStr" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_year = 1999\n", "data_dir = \"../data/fomc\"\n", "eKonf.env().FRED_API_KEY" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Load datasets\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | polarity_mean_beigebook | \n", "polarity_mean_meeting_script | \n", "polarity_mean_minutes | \n", "polarity_mean_press_conf | \n", "polarity_mean_speech | \n", "polarity_mean_statement | \n", "polarity_mean_testimony | \n", "polarity_diffusion_beigebook | \n", "polarity_diffusion_meeting_script | \n", "polarity_diffusion_minutes | \n", "... | \n", "num_tokens_sum_speech | \n", "num_tokens_sum_statement | \n", "num_tokens_sum_testimony | \n", "num_tokens_mean_beigebook | \n", "num_tokens_mean_meeting_script | \n", "num_tokens_mean_minutes | \n", "num_tokens_mean_press_conf | \n", "num_tokens_mean_speech | \n", "num_tokens_mean_statement | \n", "num_tokens_mean_testimony | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1990-02-07 | \n", "NaN | \n", "-0.087583 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.095663 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "30.213010 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1990-03-27 | \n", "NaN | \n", "-0.171992 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.179702 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "29.846369 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1990-05-15 | \n", "NaN | \n", "-0.116052 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.125461 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "29.749077 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1990-07-03 | \n", "NaN | \n", "-0.114829 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.117794 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "29.667920 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1990-08-21 | \n", "NaN | \n", "-0.209552 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.219403 | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "31.032836 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2021-11-30 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.167014 | \n", "NaN | \n", "-0.12 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "3066.0 | \n", "NaN | \n", "556.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "31.937500 | \n", "NaN | \n", "27.8 | \n", "
2021-12-01 | \n", "-0.046022 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.048109 | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "22.539497 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2021-12-02 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.077381 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "6514.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "36.188889 | \n", "NaN | \n", "NaN | \n", "
2021-12-15 | \n", "NaN | \n", "NaN | \n", "-0.043929 | \n", "-0.075441 | \n", "NaN | \n", "0.166667 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.064286 | \n", "... | \n", "NaN | \n", "489.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "30.521429 | \n", "37.587413 | \n", "NaN | \n", "27.166667 | \n", "NaN | \n", "
2021-12-17 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-0.356613 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "3694.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "29.317460 | \n", "NaN | \n", "NaN | \n", "
1876 rows × 35 columns
\n", "