Upload TF-IDF Logistic Regression baseline model

Browse files

Files changed (5) hide show

.gitattributes +1 -0
fuzzy_match_training_data.ipynb +512 -0
fuzzy_matched_chunks.csv +3 -0
label_encoder.joblib +2 -2
model_pipeline.joblib +2 -2

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+fuzzy_matched_chunks.csv filter=lfs diff=lfs merge=lfs -text

fuzzy_match_training_data.ipynb ADDED Viewed

	@@ -0,0 +1,512 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8d1fae73",
+   "metadata": {},
+   "source": [
+    "This notebook aims to map the manually extracted bools with the chunked data so we can have a more varied negative class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9ced7f63",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Derik\\anaconda3\\envs\\NDC_extraction_ENV\\lib\\site-packages\\fuzzywuzzy\\fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n",
+      "  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from fuzzywuzzy import process\n",
+    "from fuzzywuzzy import fuzz\n",
+    "\n",
+    "from tqdm.notebook import tqdm, IProgress "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e06e72c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CHUNK_TEXT_COLUMN = 'text'\n",
+    "QUOTE_TEXT_COLUMN = 'Quote or table'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e467b9cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FUZZY_MATCH_THRESHOLD = 85\n",
+    "\n",
+    "try:\n",
+    "    chunked_pdfs_df = pd.read_excel('../../etl/20250409_pdf_extraction_results.xlsx', sheet_name= 'Sheet1').drop_duplicates()\n",
+    "    extracted_quotes_df = pd.read_excel('../NDC_scraping_stage_1.xlsx', sheet_name= 'Prev_Finance').drop_duplicates()\n",
+    "except Exception as e:\n",
+    "    raise RuntimeError(f\"An unexpected error occurred while loading DataFrames: {e}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "48d2f1d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if CHUNK_TEXT_COLUMN not in chunked_pdfs_df.columns:\n",
+    "    raise ValueError(f\"Error: Chunk text column '{CHUNK_TEXT_COLUMN}' not found in 'chunked_pdfs_df'.\")\n",
+    "if QUOTE_TEXT_COLUMN not in extracted_quotes_df.columns:\n",
+    "    raise ValueError(f\"Error: Quote text column '{QUOTE_TEXT_COLUMN}' not found in 'extracted_quotes_df'.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8063a230",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_quotes = extracted_quotes_df[QUOTE_TEXT_COLUMN].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "bdc824af",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "703b233b5adf4465825b90883d1dcafe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fuzzy Matching Chunks:   0%|          | 0/60128 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: ')']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: ';']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: ',']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: ').']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '/']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: '.']\n",
+      "WARNING:root:Applied processor reduces input query to empty string, all comparisons will have score 0. [Query: ').']\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fuzzy matching complete.\n"
+     ]
+    }
+   ],
+   "source": [
+    "chunked_pdfs_df['is_target_quote'] = 0\n",
+    "chunked_pdfs_df['matched_quote'] = None\n",
+    "chunked_pdfs_df['match_score'] = 0\n",
+    "\n",
+    "# Iterate through each chunk with a progress bar\n",
+    "# tqdm will automatically print the \"Starting...\" and \"Complete.\" messages through its bar.\n",
+    "# For Jupyter/IPython notebooks, use tqdm.notebook.tqdm. For scripts, use tqdm.tqdm\n",
+    "for index, row in tqdm(chunked_pdfs_df.iterrows(), total=len(chunked_pdfs_df), desc=\"Fuzzy Matching Chunks\"):\n",
+    "    chunk_text = str(row[CHUNK_TEXT_COLUMN]) # Convert to string to handle potential non-string types\n",
+    "\n",
+    "    # Find the best matching quote and its score\n",
+    "    best_match_tuple = process.extractOne(chunk_text, all_quotes, scorer=fuzz.token_set_ratio)\n",
+    "\n",
+    "    if best_match_tuple:\n",
+    "        best_match_quote = best_match_tuple[0]\n",
+    "        match_score = best_match_tuple[1]\n",
+    "\n",
+    "        if match_score >= FUZZY_MATCH_THRESHOLD:\n",
+    "            chunked_pdfs_df.loc[index, 'is_target_quote'] = 1\n",
+    "            chunked_pdfs_df.loc[index, 'matched_quote'] = best_match_quote\n",
+    "            chunked_pdfs_df.loc[index, 'match_score'] = match_score\n",
+    "print(\"Fuzzy matching complete.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9c0e0d8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>filename</th>\n",
+       "      <th>filepath</th>\n",
+       "      <th>indicated_page</th>\n",
+       "      <th>chunk_num</th>\n",
+       "      <th>text</th>\n",
+       "      <th>contains_thematic_scope</th>\n",
+       "      <th>contains_coverage</th>\n",
+       "      <th>contains_Granularity</th>\n",
+       "      <th>is_target_quote</th>\n",
+       "      <th>matched_quote</th>\n",
+       "      <th>match_score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>Afghanistan_First_NDC.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Afghanistan\\Afghanistan_First...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1 ISLAMIC REPUBLIC OF AFGHANISTAN Intended Nat...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>Afghanistan_First_NDC.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Afghanistan\\Afghanistan_First...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>its Intended Nationally Determined Contributio...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>Afghanistan_First_NDC.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Afghanistan\\Afghanistan_First...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>atural resource management, agriculture, waste...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Target Years: \\n2020 to 2030 \\nContribution Ty...</td>\n",
+       "      <td>98</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>Afghanistan_First_NDC.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Afghanistan\\Afghanistan_First...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>ss as usual (BAU) 2030 scenario, conditional o...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>Afghanistan_First_NDC.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Afghanistan\\Afghanistan_First...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>or Afghanistan showing 13.6% relative reductio...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60123</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>Zimbabwe_NDC30_Country_Statement.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...</td>\n",
+       "      <td>40</td>\n",
+       "      <td>845</td>\n",
+       "      <td>ILDING, EDUCATION, TRAINING AND AWARENESS The ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60124</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>Zimbabwe_NDC30_Country_Statement.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...</td>\n",
+       "      <td>40</td>\n",
+       "      <td>846</td>\n",
+       "      <td>ious sectors. The enhanced integration of clim...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60125</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>Zimbabwe_NDC30_Country_Statement.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...</td>\n",
+       "      <td>40</td>\n",
+       "      <td>847</td>\n",
+       "      <td>pacity building and innovation. In addition, t...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60126</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>Zimbabwe_NDC30_Country_Statement.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...</td>\n",
+       "      <td>41</td>\n",
+       "      <td>848</td>\n",
+       "      <td>35 ZIMBABWE’S NDC3.0 COUNTRY STATEMENT</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>60127</th>\n",
+       "      <td>Zimbabwe</td>\n",
+       "      <td>Zimbabwe_NDC30_Country_Statement.pdf</td>\n",
+       "      <td>../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...</td>\n",
+       "      <td>42</td>\n",
+       "      <td>849</td>\n",
+       "      <td>36 ZIMBABWE’S NDC3.0 COUNTRY STATEMENT Ministr...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>60128 rows × 12 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           country                              filename  \\\n",
+       "0      Afghanistan             Afghanistan_First_NDC.pdf   \n",
+       "1      Afghanistan             Afghanistan_First_NDC.pdf   \n",
+       "2      Afghanistan             Afghanistan_First_NDC.pdf   \n",
+       "3      Afghanistan             Afghanistan_First_NDC.pdf   \n",
+       "4      Afghanistan             Afghanistan_First_NDC.pdf   \n",
+       "...            ...                                   ...   \n",
+       "60123     Zimbabwe  Zimbabwe_NDC30_Country_Statement.pdf   \n",
+       "60124     Zimbabwe  Zimbabwe_NDC30_Country_Statement.pdf   \n",
+       "60125     Zimbabwe  Zimbabwe_NDC30_Country_Statement.pdf   \n",
+       "60126     Zimbabwe  Zimbabwe_NDC30_Country_Statement.pdf   \n",
+       "60127     Zimbabwe  Zimbabwe_NDC30_Country_Statement.pdf   \n",
+       "\n",
+       "                                                filepath  indicated_page  \\\n",
+       "0      ../data/raw/pdfs\\Afghanistan\\Afghanistan_First...               1   \n",
+       "1      ../data/raw/pdfs\\Afghanistan\\Afghanistan_First...               1   \n",
+       "2      ../data/raw/pdfs\\Afghanistan\\Afghanistan_First...               1   \n",
+       "3      ../data/raw/pdfs\\Afghanistan\\Afghanistan_First...               1   \n",
+       "4      ../data/raw/pdfs\\Afghanistan\\Afghanistan_First...               1   \n",
+       "...                                                  ...             ...   \n",
+       "60123  ../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...              40   \n",
+       "60124  ../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...              40   \n",
+       "60125  ../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...              40   \n",
+       "60126  ../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...              41   \n",
+       "60127  ../data/raw/pdfs\\Zimbabwe\\Zimbabwe_NDC30_Count...              42   \n",
+       "\n",
+       "       chunk_num                                               text  \\\n",
+       "0              1  1 ISLAMIC REPUBLIC OF AFGHANISTAN Intended Nat...   \n",
+       "1              2  its Intended Nationally Determined Contributio...   \n",
+       "2              3  atural resource management, agriculture, waste...   \n",
+       "3              4  ss as usual (BAU) 2030 scenario, conditional o...   \n",
+       "4              5  or Afghanistan showing 13.6% relative reductio...   \n",
+       "...          ...                                                ...   \n",
+       "60123        845  ILDING, EDUCATION, TRAINING AND AWARENESS The ...   \n",
+       "60124        846  ious sectors. The enhanced integration of clim...   \n",
+       "60125        847  pacity building and innovation. In addition, t...   \n",
+       "60126        848             35 ZIMBABWE’S NDC3.0 COUNTRY STATEMENT   \n",
+       "60127        849  36 ZIMBABWE’S NDC3.0 COUNTRY STATEMENT Ministr...   \n",
+       "\n",
+       "       contains_thematic_scope  contains_coverage  contains_Granularity  \\\n",
+       "0                          NaN                NaN                   NaN   \n",
+       "1                          NaN                NaN                   NaN   \n",
+       "2                          NaN                NaN                   NaN   \n",
+       "3                          NaN                NaN                   NaN   \n",
+       "4                          NaN                NaN                   NaN   \n",
+       "...                        ...                ...                   ...   \n",
+       "60123                      NaN                NaN                   NaN   \n",
+       "60124                      NaN                NaN                   NaN   \n",
+       "60125                      NaN                NaN                   NaN   \n",
+       "60126                      NaN                NaN                   NaN   \n",
+       "60127                      NaN                NaN                   NaN   \n",
+       "\n",
+       "       is_target_quote                                      matched_quote  \\\n",
+       "0                    0                                               None   \n",
+       "1                    0                                               None   \n",
+       "2                    1  Target Years: \\n2020 to 2030 \\nContribution Ty...   \n",
+       "3                    0                                               None   \n",
+       "4                    0                                               None   \n",
+       "...                ...                                                ...   \n",
+       "60123                0                                               None   \n",
+       "60124                0                                               None   \n",
+       "60125                0                                               None   \n",
+       "60126                0                                               None   \n",
+       "60127                0                                               None   \n",
+       "\n",
+       "       match_score  \n",
+       "0                0  \n",
+       "1                0  \n",
+       "2               98  \n",
+       "3                0  \n",
+       "4                0  \n",
+       "...            ...  \n",
+       "60123            0  \n",
+       "60124            0  \n",
+       "60125            0  \n",
+       "60126            0  \n",
+       "60127            0  \n",
+       "\n",
+       "[60128 rows x 12 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chunked_pdfs_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9d7038e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chunked_pdfs_df.to_csv('./fuzzy_matched_chunks.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "76b51ab6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/c/Users/Derik/Desktop/NDC_Scraper/Classification Model/tf_idf_lr_model\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pwd"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "NDC_extraction_ENV",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

fuzzy_matched_chunks.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:783c1ee7c7b2ef9a44592e0f7e96e0b290ce88b2337eec0a42460e9ceb0c32fa
+size 23764705

label_encoder.joblib CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e37ba6e0f6dee4380507f091d596429ef0fda3c46a997f1798b207785b5247e
-size 335

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d2f1f6c74d9339461a40453974dcfcf407a5a78522cef40080d323128fd8f9b
+size 343

model_pipeline.joblib CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91e4edd8a43620a74decfe6e96b07d2a8f2934170e3e5c02c9a4a51291bd2e12
-size 1181168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cbe13d3943b9379411289561324e244a33fb2208e31912b8380a9896e557af2
+size 111587616