montebello-642
/

logistic-regression

Model card Files Files and versions

xet

Community

montebello-642 commited on Jan 13, 2024

Commit

f2a4ddd

verified ·

1 Parent(s): 6b4216c

Delete Logistic Regression.ipynb

Browse files

Files changed (1) hide show

Logistic Regression.ipynb +0 -264

Logistic Regression.ipynb DELETED Viewed

@@ -1,264 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force',\n",
-      "       'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language',\n",
-      "       'outcome_description'],\n",
-      "      dtype='object')\n",
-      "   duration_mo  mos_ethnicity  complainant_ethnicity  is_force  \\\n",
-      "0           10              0                      2         0   \n",
-      "1            9              1                      2         0   \n",
-      "2            9              1                      2         1   \n",
-      "3           14              1                      2         0   \n",
-      "4            6              0                      7         0   \n",
-      "\n",
-      "   is_abuse_of_authority  is_discourtesy  is_offensive_language  \\\n",
-      "0                      1               0                      0   \n",
-      "1                      0               1                      0   \n",
-      "2                      0               0                      0   \n",
-      "3                      1               0                      0   \n",
-      "4                      0               0                      1   \n",
-      "\n",
-      "   outcome_description  \n",
-      "0                    0  \n",
-      "1                    0  \n",
-      "2                    0  \n",
-      "3                    0  \n",
-      "4                    1  \n",
-      "        duration_mo  mos_ethnicity  complainant_ethnicity      is_force  \\\n",
-      "count  33358.000000   33358.000000           33358.000000  33358.000000   \n",
-      "mean       9.733767       0.946819               2.468283      0.022573   \n",
-      "std        5.017703       0.754311               2.256281      0.148541   \n",
-      "min        0.000000       0.000000               0.000000      0.000000   \n",
-      "25%        6.000000       0.000000               1.000000      0.000000   \n",
-      "50%       10.000000       1.000000               2.000000      0.000000   \n",
-      "75%       13.000000       1.000000               2.000000      0.000000   \n",
-      "max      110.000000       4.000000               7.000000      1.000000   \n",
-      "\n",
-      "       is_abuse_of_authority  is_discourtesy  is_offensive_language  \\\n",
-      "count           33358.000000    33358.000000           33358.000000   \n",
-      "mean                0.608310        0.140206               0.228911   \n",
-      "std                 0.488135        0.347206               0.420138   \n",
-      "min                 0.000000        0.000000               0.000000   \n",
-      "25%                 0.000000        0.000000               0.000000   \n",
-      "50%                 1.000000        0.000000               0.000000   \n",
-      "75%                 1.000000        0.000000               0.000000   \n",
-      "max                 1.000000        1.000000               1.000000   \n",
-      "\n",
-      "       outcome_description  \n",
-      "count         33358.000000  \n",
-      "mean              0.438066  \n",
-      "std               0.496157  \n",
-      "min               0.000000  \n",
-      "25%               0.000000  \n",
-      "50%               0.000000  \n",
-      "75%               1.000000  \n",
-      "max               1.000000  \n",
-      "duration_mo              0\n",
-      "mos_ethnicity            0\n",
-      "complainant_ethnicity    0\n",
-      "is_force                 0\n",
-      "is_abuse_of_authority    0\n",
-      "is_discourtesy           0\n",
-      "is_offensive_language    0\n",
-      "outcome_description      0\n",
-      "dtype: int64\n",
-      "Accuracy: 0.65\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.65      0.82      0.72      3778\n",
-      "           1       0.64      0.42      0.51      2894\n",
-      "\n",
-      "    accuracy                           0.65      6672\n",
-      "   macro avg       0.64      0.62      0.62      6672\n",
-      "weighted avg       0.64      0.65      0.63      6672\n",
-      "\n",
-      "Running on local URL:  http://127.0.0.1:7860\n",
-      "Running on public URL: https://d8846d114093b0894a.gradio.live\n",
-      "\n",
-      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": "<IPython.core.display.HTML object>",
-      "text/html": "<div><iframe src=\"https://d8846d114093b0894a.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": ""
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from sklearn.model_selection import train_test_split, cross_val_score\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
-    "import seaborn as sns\n",
-    "import matplotlib.pyplot as plt\n",
-    "import gradio as gr\n",
-    "import numpy as np\n",
-    "\n",
-    "#loading the dataset and select only the columns needed\n",
-    "selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']\n",
-    "df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)\n",
-    "\n",
-    "print(df.columns)\n",
-    "print(df.head())\n",
-    "print(df.describe())\n",
-    "print(df.isnull().sum())\n",
-    "\n",
-    "#set the name of the column to calculate accuracy\n",
-    "X = df.drop('outcome_description', axis=1)\n",
-    "y = df['outcome_description']\n",
-    "X.fillna(0, inplace=True)\n",
-    "\n",
-    "#split into training and test set\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
-    "\n",
-    "#standardize the features\n",
-    "scaler = StandardScaler()\n",
-    "X_train_scaled = scaler.fit_transform(X_train)\n",
-    "X_test_scaled = scaler.transform(X_test)\n",
-    "\n",
-    "#train the model\n",
-    "model = LogisticRegression(random_state=42)\n",
-    "model.fit(X_train_scaled, y_train)\n",
-    "\n",
-    "#make predictions and evaluate the model\n",
-    "y_pred = model.predict(X_test_scaled)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f'Accuracy: {accuracy:.2f}')\n",
-    "\n",
-    "#classification report with confusion matrix, correlation graph and standard deviation of all the variables\n",
-    "print(classification_report(y_test, y_pred))\n",
-    "\n",
-    "# Confusion Matrix\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "plt.figure(figsize=(8, 6))\n",
-    "sns.heatmap(conf_matrix, annot=True, fmt=\"d\", cmap=\"Blues\", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())\n",
-    "plt.title(\"Confusion Matrix\")\n",
-    "plt.xlabel(\"Predicted\")\n",
-    "plt.ylabel(\"Actual\")\n",
-    "plt.show()\n",
-    "\n",
-    "#Correlation Matrix\n",
-    "correlation_matrix = df.corr()\n",
-    "plt.figure(figsize=(10, 8))\n",
-    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=\".2f\", linewidths=.5)\n",
-    "plt.title('Correlation Matrix')\n",
-    "plt.show()\n",
-    "\n",
-    "#plotting a bar chart to visualize better the correlation\n",
-    "target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)\n",
-    "plt.figure(figsize=(10, 6))\n",
-    "target_correlations.drop('outcome_description').plot(kind='bar', color='blue')\n",
-    "plt.title('Correlations with Target Variable')\n",
-    "plt.xlabel('Features')\n",
-    "plt.ylabel('Correlation')\n",
-    "plt.show()\n",
-    "\n",
-    "#Standard Deviation\n",
-    "std_dev = df.std()\n",
-    "print('\\nStandard deviation')\n",
-    "print(std_dev)\n",
-    "\n",
-    "#gradio implementation\n",
-    "#create the available options for the ethnicities\n",
-    "mos_ethnicity_options = [\"Hispanic\", \"White\", \"Black\", \"Asian\", \"American Indian\", \"Other Race\", \"Refused\", \"Unknown\"]\n",
-    "complainant_ethnicity_options = [\"Hispanic\", \"White\", \"Black\", \"Asian\", \"American Indian\", \"Other Race\", \"Refused\", \"Unknown\"]\n",
-    "\n",
-    "#defining the function to make predictions using the model\n",
-    "def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):\n",
-    "    try:\n",
-    "        #converting values from string to int\n",
-    "        mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)\n",
-    "        complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)\n",
-    "\n",
-    "        #converting checkbox value to int\n",
-    "        is_force = int(is_force)\n",
-    "        is_abuse_of_authority = int(is_abuse_of_authority)\n",
-    "        is_discourtesy = int(is_discourtesy)\n",
-    "        is_offensive_language = int(is_offensive_language)\n",
-    "\n",
-    "        input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]\n",
-    "        input_scaled = scaler.transform(input_data)\n",
-    "        prediction = model.predict(input_scaled)[0]\n",
-    "\n",
-    "        #outputting the result\n",
-    "        return \"Arrest\" if prediction == 1 else \"No Arrest\"\n",
-    "\n",
-    "    except Exception as e:\n",
-    "        return f\"Error: {str(e)}\"\n",
-    "\n",
-    "#creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months\n",
-    "mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label=\"Defendant Ethnicity\")\n",
-    "complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label=\"Complainant Ethnicity\")\n",
-    "is_force_checkbox = gr.Checkbox()\n",
-    "is_abuse_of_authority_checkbox = gr.Checkbox()\n",
-    "is_discourtesy_checkbox = gr.Checkbox()\n",
-    "is_offensive_language_checkbox = gr.Checkbox()\n",
-    "duration_mo_slider = gr.Slider(minimum=0, maximum=20, label=\"Duration in months\")\n",
-    "\n",
-    "iface = gr.Interface(\n",
-    "    fn=predict_outcome_duration,\n",
-    "    inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],\n",
-    "    outputs=\"text\",\n",
-    "    live=True,\n",
-    "    title=\"Complaint Outcome Prediction\"\n",
-    ")\n",
-    "\n",
-    "# Launch the Gradio Interface\n",
-    "iface.launch(share=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}