{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize Features\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:ekorpkit.base:IPython version: (6, 9, 0), client: jupyter_client\n", "INFO:ekorpkit.base:Google Colab not detected.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "version: 0.1.35+0.g69734d6.dirty\n", "is notebook? True\n", "is colab? False\n", "evironment varialbles:\n", "{'CUDA_DEVICE_ORDER': None,\n", " 'CUDA_VISIBLE_DEVICES': None,\n", " 'EKORPKIT_CONFIG_DIR': '/workspace/projects/ekorpkit-book/config',\n", " 'EKORPKIT_DATA_DIR': None,\n", " 'EKORPKIT_LOG_LEVEL': 'WARNING',\n", " 'EKORPKIT_PROJECT': 'ekorpkit-book',\n", " 'EKORPKIT_WORKSPACE_ROOT': '/workspace',\n", " 'KMP_DUPLICATE_LIB_OK': 'TRUE',\n", " 'NUM_WORKERS': 230}\n" ] } ], "source": [ "%config InlineBackend.figure_format='retina'\n", "from ekorpkit import eKonf\n", "\n", "eKonf.setLogger(\"WARNING\")\n", "print(\"version:\", eKonf.__version__)\n", "print(\"is notebook?\", eKonf.is_notebook())\n", "print(\"is colab?\", eKonf.is_colab())\n", "print(\"evironment varialbles:\")\n", "eKonf.print(eKonf.env().dict())" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydantic.types.SecretStr" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_year = 1999\n", "data_dir = \"../data/fomc\"\n", "eKonf.env().FRED_API_KEY\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Build and load a feature set with tones\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | target | \n", "prev_decision | \n", "GDP_diff_prev | \n", "PMI | \n", "EMP_diff_prev | \n", "RSALES_diff_year | \n", "UNEMP_diff_prev | \n", "HSALES_diff_year | \n", "Inertia_diff | \n", "Balanced_diff | \n", "
---|---|---|---|---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1982-10-05 | \n", "Cut | \n", "0.0 | \n", "0.456199 | \n", "38.8 | \n", "-0.201426 | \n", "2.094256 | \n", "3.061224 | \n", "42.307692 | \n", "0.0 | \n", "0.0 | \n", "
1982-11-16 | \n", "Cut | \n", "-1.0 | \n", "-0.382299 | \n", "39.4 | \n", "-0.309476 | \n", "2.094256 | \n", "2.970297 | \n", "34.831461 | \n", "0.0 | \n", "0.0 | \n", "
1982-12-21 | \n", "Hold | \n", "-1.0 | \n", "-0.382299 | \n", "39.2 | \n", "-0.136097 | \n", "2.094256 | \n", "3.846154 | \n", "45.026178 | \n", "0.0 | \n", "0.0 | \n", "
1983-01-14 | \n", "Hold | \n", "0.0 | \n", "-0.382299 | \n", "42.8 | \n", "-0.016895 | \n", "2.094256 | \n", "0.000000 | \n", "14.004376 | \n", "0.0 | \n", "0.0 | \n", "
1983-01-21 | \n", "Hold | \n", "0.0 | \n", "-0.382299 | \n", "42.8 | \n", "-0.016895 | \n", "2.094256 | \n", "0.000000 | \n", "14.004376 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2021-11-03 | \n", "Hold | \n", "0.0 | \n", "0.570948 | \n", "60.5 | \n", "0.288624 | \n", "8.474656 | \n", "-9.615385 | \n", "-26.135217 | \n", "0.0 | \n", "0.0 | \n", "
2021-12-15 | \n", "Hold | \n", "0.0 | \n", "0.570948 | \n", "60.6 | \n", "0.437147 | \n", "10.977142 | \n", "-8.695652 | \n", "-11.163337 | \n", "0.0 | \n", "0.0 | \n", "
2022-01-26 | \n", "Hold | \n", "0.0 | \n", "0.570948 | \n", "58.8 | \n", "0.395555 | \n", "9.101289 | \n", "-7.142857 | \n", "-3.673938 | \n", "0.0 | \n", "0.0 | \n", "
2022-03-16 | \n", "Hike | \n", "0.0 | \n", "1.680778 | \n", "58.6 | \n", "0.476814 | \n", "9.076698 | \n", "-5.000000 | \n", "3.125000 | \n", "0.0 | \n", "0.0 | \n", "
2022-05-04 | \n", "Hike | \n", "1.0 | \n", "-0.355417 | \n", "57.1 | \n", "0.283658 | \n", "-0.034915 | \n", "0.000000 | \n", "-26.946848 | \n", "0.0 | \n", "0.0 | \n", "
415 rows × 10 columns
\n", "\n", " | target | \n", "prev_decision | \n", "GDP_diff_prev | \n", "PMI | \n", "EMP_diff_prev | \n", "RSALES_diff_year | \n", "UNEMP_diff_prev | \n", "HSALES_diff_year | \n", "Inertia_diff | \n", "Balanced_diff | \n", "... | \n", "polarity_diffusion_statement | \n", "finbert_diffusion_minutes | \n", "finbert_diffusion_speech | \n", "finbert_diffusion_statement | \n", "t5_diffusion_minutes | \n", "t5_diffusion_speech | \n", "t5_diffusion_statement | \n", "lm_tones | \n", "finbert_tones | \n", "t5_tones | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1999-02-03 | \n", "Hold | \n", "0.0 | \n", "1.616191 | \n", "46.8 | \n", "0.286880 | \n", "4.952373 | \n", "0.000000 | \n", "19.672131 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "-0.363636 | \n", "0.662069 | \n", "0.222222 | \n", "0.636364 | \n", "0.420690 | \n", "0.111111 | \n", "0.136364 | \n", "-0.243343 | \n", "0.506885 | \n", "0.222721 | \n", "
1999-03-30 | \n", "Hold | \n", "0.0 | \n", "1.616191 | \n", "51.7 | \n", "0.327325 | \n", "5.932944 | \n", "2.325581 | \n", "-2.078522 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "-0.363636 | \n", "0.570048 | \n", "0.381295 | \n", "0.636364 | \n", "0.405797 | \n", "0.187050 | \n", "0.136364 | \n", "-0.241290 | \n", "0.529236 | \n", "0.243070 | \n", "
1999-05-18 | \n", "Hold | \n", "0.0 | \n", "0.943827 | \n", "52.3 | \n", "0.288551 | \n", "4.601659 | \n", "2.380952 | \n", "6.004619 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "-0.363636 | \n", "0.719424 | \n", "0.383234 | \n", "0.636364 | \n", "0.474820 | \n", "0.245509 | \n", "0.136364 | \n", "-0.112051 | \n", "0.579674 | \n", "0.285564 | \n", "
1999-06-30 | \n", "Hike | \n", "0.0 | \n", "0.943827 | \n", "54.3 | \n", "0.164078 | \n", "5.248177 | \n", "-2.325581 | \n", "0.112740 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.000000 | \n", "0.767606 | \n", "0.097561 | \n", "1.000000 | \n", "0.514085 | \n", "0.097561 | \n", "0.625000 | \n", "-0.050040 | \n", "0.621722 | \n", "0.412215 | \n", "
1999-08-24 | \n", "Hike | \n", "1.0 | \n", "0.835000 | \n", "53.6 | \n", "0.251764 | \n", "6.166822 | \n", "0.000000 | \n", "2.739726 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "-0.346154 | \n", "0.773810 | \n", "0.178947 | \n", "0.692308 | \n", "0.583333 | \n", "0.178947 | \n", "0.269231 | \n", "-0.135226 | \n", "0.548355 | \n", "0.343837 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2021-06-16 | \n", "Hold | \n", "0.0 | \n", "1.533890 | \n", "61.6 | \n", "0.308928 | \n", "22.314413 | \n", "-3.333333 | \n", "4.815864 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.307692 | \n", "0.666667 | \n", "0.464567 | \n", "0.615385 | \n", "0.407407 | \n", "0.330709 | \n", "0.384615 | \n", "0.086359 | \n", "0.582206 | \n", "0.374244 | \n", "
2021-07-28 | \n", "Hold | \n", "0.0 | \n", "1.533890 | \n", "60.9 | \n", "0.383765 | \n", "13.352829 | \n", "1.724138 | \n", "-22.559653 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.384615 | \n", "0.640138 | \n", "0.157895 | \n", "0.461538 | \n", "0.456747 | \n", "0.228070 | \n", "0.615385 | \n", "0.167826 | \n", "0.419857 | \n", "0.433401 | \n", "
2021-09-22 | \n", "Hold | \n", "0.0 | \n", "1.640747 | \n", "59.7 | \n", "0.353173 | \n", "9.952513 | \n", "-3.703704 | \n", "-33.783784 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.461538 | \n", "0.611842 | \n", "0.172840 | \n", "0.538462 | \n", "0.414474 | \n", "0.148148 | \n", "0.538462 | \n", "0.176087 | \n", "0.441048 | \n", "0.367028 | \n", "
2021-11-03 | \n", "Hold | \n", "0.0 | \n", "0.570948 | \n", "60.5 | \n", "0.288624 | \n", "8.474656 | \n", "-9.615385 | \n", "-26.135217 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.428571 | \n", "0.651163 | \n", "0.371429 | \n", "0.571429 | \n", "0.395349 | \n", "0.085714 | \n", "0.571429 | \n", "-0.037468 | \n", "0.531340 | \n", "0.350831 | \n", "
2021-12-15 | \n", "Hold | \n", "0.0 | \n", "0.570948 | \n", "60.6 | \n", "0.437147 | \n", "10.977142 | \n", "-8.695652 | \n", "-11.163337 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.176471 | \n", "0.706383 | \n", "0.338889 | \n", "0.647059 | \n", "0.476596 | \n", "0.250000 | \n", "0.705882 | \n", "-0.001460 | \n", "0.564110 | \n", "0.477493 | \n", "
219 rows × 22 columns
\n", "\n", " | date | \n", "target | \n", "prev_decision | \n", "GDP_diff_prev | \n", "PMI | \n", "EMP_diff_prev | \n", "RSALES_diff_year | \n", "UNEMP_diff_prev | \n", "HSALES_diff_year | \n", "Inertia_diff | \n", "... | \n", "finbert_diffusion_minutes | \n", "finbert_diffusion_speech | \n", "finbert_diffusion_statement | \n", "t5_diffusion_minutes | \n", "t5_diffusion_speech | \n", "t5_diffusion_statement | \n", "lm_tones | \n", "finbert_tones | \n", "t5_tones | \n", "split | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "2010-04-28 | \n", "2 | \n", "0.0 | \n", "1.067962 | \n", "58.8 | \n", "0.139555 | \n", "5.061896 | \n", "1.020408 | \n", "12.389381 | \n", "0.0 | \n", "... | \n", "0.677083 | \n", "0.117647 | \n", "0.583333 | \n", "0.359375 | \n", "0.117647 | \n", "0.333333 | \n", "-0.050143 | \n", "0.459355 | \n", "0.270118 | \n", "train | \n", "
1 | \n", "2008-10-29 | \n", "0 | \n", "-1.0 | \n", "0.572295 | \n", "44.8 | \n", "-0.335245 | \n", "-6.288802 | \n", "0.000000 | \n", "-36.880466 | \n", "0.0 | \n", "... | \n", "0.628378 | \n", "0.388889 | \n", "0.791667 | \n", "0.283784 | \n", "0.126984 | \n", "0.208333 | \n", "-0.297673 | \n", "0.602978 | \n", "0.206367 | \n", "train | \n", "
2 | \n", "2018-12-19 | \n", "1 | \n", "0.0 | \n", "0.481953 | \n", "58.8 | \n", "0.068241 | \n", "1.349789 | \n", "0.000000 | \n", "-15.611814 | \n", "0.0 | \n", "... | \n", "0.650206 | \n", "0.689394 | \n", "0.444444 | \n", "0.362140 | \n", "0.378788 | \n", "0.333333 | \n", "-0.017770 | \n", "0.594681 | \n", "0.358087 | \n", "train | \n", "
3 | \n", "2009-08-12 | \n", "2 | \n", "0.0 | \n", "-0.169238 | \n", "49.9 | \n", "-0.259526 | \n", "-7.390883 | \n", "0.000000 | \n", "-13.836478 | \n", "0.0 | \n", "... | \n", "0.676806 | \n", "0.458333 | \n", "0.545455 | \n", "0.334601 | \n", "0.208333 | \n", "0.454545 | \n", "-0.117091 | \n", "0.560198 | \n", "0.332493 | \n", "train | \n", "
4 | \n", "2000-11-15 | \n", "2 | \n", "0.0 | \n", "0.099774 | \n", "48.7 | \n", "-0.001511 | \n", "2.243324 | \n", "0.000000 | \n", "6.995413 | \n", "0.0 | \n", "... | \n", "0.752381 | \n", "0.642276 | \n", "0.842105 | \n", "0.466667 | \n", "0.471545 | \n", "0.736842 | \n", "0.016816 | \n", "0.745588 | \n", "0.558351 | \n", "train | \n", "
5 rows × 24 columns
\n", "