Spaces:

Ordenador
/

classify-text-with-bert-hate-speech

Sleeping

App Files Files Community

Mario Faúndez Vidal commited on Oct 10, 2025

Commit

f1f6f45

1 Parent(s): b197211

Update project configuration and remove requirements.in

Browse files

Files changed (6) hide show

.gitignore +2 -0
Makefile +75 -17
hate_speech_bert_bert_mlp_in_tensorflow.ipynb +0 -0
hate_speech_run.ipynb +122 -123
requirements.in +0 -3
uv.lock +0 -0

.gitignore CHANGED Viewed

@@ -128,3 +128,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
 flagged/

 # Pyre type checker
 .pyre/
 flagged/
+.ruff_cache/

Makefile CHANGED Viewed

@@ -1,24 +1,82 @@
-SHELL=/bin/sh
-export PATH := ./venv/bin:$(PATH)
-.PHONY: help
-help: ## This help.
-	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
 .DEFAULT_GOAL := help
-venv:
-	touch requirements.txt ;\
-	test -d venv || virtualenv --python=$$PYTHON3 venv
-pip-compile: venv
-	python -m pip install --upgrade pip;\
-	pip install pip-tools;\
-	touch requirements.in ;\
-	pip-compile --output-file requirements.txt requirements.in;\
-	pip install -r requirements.txt
-autopep8:
-	autopep8 -i *.py
 clean:
-	rm -fr venv

+.PHONY: help install dev clean test format lint status run notebook docker-build docker-run
+IMAGE_NAME := classify-text-with-bert-hate-speech
+IMAGE_TAG := local
+# Default target - show help
 .DEFAULT_GOAL := help
+.ONESHELL:
+help:
+	@echo "Project - Available Commands:"
+	@echo ""
+	@echo "  make install     - Install production dependencies with uv"
+	@echo "  make dev         - Install development dependencies with uv"
+	@echo "  make run         - Run the application (app.py)"
+	@echo "  make notebook    - Launch jupyter notebook/lab"
+	@echo "  make test        - Run tests (requires pytest)"
+	@echo "  make format      - Format code (ruff or black if available)"
+	@echo "  make lint        - Run linter (ruff if available)"
+	@echo "  make clean       - Remove venv and build/test artifacts"
+	@echo "  make status      - Show python version and installed packages summary"
+	@echo "  make docker-build- Build a local docker image"
+	@echo "  make docker-run  - Run the docker image locally"
+	@echo ""
+install:
+	@echo "📦 Installing production dependencies with uv..."
+	uv sync --no-dev
+	@echo "✅ Production dependencies installed successfully!"
+dev:
+	@echo "📦 Installing development dependencies with uv..."
+	uv sync --dev
+	@echo "✅ Development dependencies installed successfully!"
+run:
+	@echo "Running app.py..."
+	uv run python app.py
+notebook:
+	@echo "Launching Jupyter Notebook (or lab if available)..."
+	uv run jupyter lab || uv run jupyter notebook
+test:
+	@echo "Running tests with pytest..."
+	uv run pytest -q tests || true
+format:
+	@echo "Formatting code with ruff..."
+	uv run ruff format . || true
+	-uv run ruff check --fix . || true
+lint:
+	@echo "Running linter with ruff..."
+	uv run ruff check .
+status:
+	@uv run python --version
+	@echo "Installed packages:"
+	@uv pip list
 clean:
+	@echo "🧹 Cleaning up build and Python artifacts..."
+	-rm -rf .venv build/ dist/ *.egg-info .eggs
+	-find . -type d -name "__pycache__" -exec rm -rf {} +
+	-find . -type f -name "*.py[co]" -delete
+	-find . -type f -name ".coverage" -delete
+	-find . -type d -name ".pytest_cache" -exec rm -rf {} +
+	-find . -type d -name ".ruff_cache" -exec rm -rf {} +
+	-find . -type d -name ".mypy_cache" -exec rm -rf {} +
+	-find . -type d -name "htmlcov" -exec rm -rf {} +
+	-find . -type f -name ".DS_Store" -delete
+	@echo "✅ Cleanup completed!"
+docker-build:
+	@echo "Building docker image ${IMAGE_NAME}:${IMAGE_TAG} (requires Docker)"
+	@docker build -t ${IMAGE_NAME}:${IMAGE_TAG} . || true
+docker-run:
+	@echo "Running docker image ${IMAGE_NAME}:${IMAGE_TAG} (port 7860 forwarded)"
+	@docker run --rm -p 7860:7860 ${IMAGE_NAME}:${IMAGE_TAG}

hate_speech_bert_bert_mlp_in_tensorflow.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

hate_speech_run.ipynb CHANGED Viewed

@@ -1,134 +1,133 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "gpuClass": "standard"
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# Implementation of text classification with BERT\n",
-        "\n",
-        "\n",
-        "Still Working on it.\n",
-        "\n",
-        "This notebook is based in this TensorFlow tutorial: [Classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_wibert)\n",
-        "\n",
-        "BERT [(article link)](https://arxiv.org/abs/1810.04805) and other Transformer encoder architectures have been wildly successful on a variety of tasks in NLP (natural language processing). They compute vector-space representations of natural language that are suitable for use in deep learning models.\n",
-        "\n",
-        "![](http://www.d2l.ai/_images/nlp-map-pretrain.svg):\n",
-        "\n",
-        "Source: http://www.d2l.ai/chapter_natural-language-processing-pretraining/index.html\n",
-        "\n",
-        "BERT models are usually pre-trained on a large corpus of text, then fine-tuned for specific tasks.\n",
-        "\n",
-        "In this notebook, I am going to use a pretreined BERT to compute vector-space representations of a hate speech dataset to feed two different downsteam Archtectures (CNN and MLP).\n",
-        "\n",
-        "Sentiment Analysis\n",
-        "\n",
-        "This notebook trains a sentiment analysis model to classify the [Hate Speech and Offensive Language Dataset]( https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset) tweets in three classes:\n",
-        " \n",
-        "* 0 - hate speech \n",
-        "* 1 - offensive language \n",
-        "* 2 - neither as positive or negative"
-      ],
-      "metadata": {
-        "id": "Jh_WkIs1iJDs"
-      }
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ltc_HOzjX87s"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -q tensorflow-text > /dev/null\n",
-        "!pip install -q tf-models-official > /dev/null"
-      ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "from official.nlp.optimization import AdamWeightDecay, WarmUp\n",
-        "import tensorflow as tf\n",
-        "import tensorflow_hub as hub\n",
-        "import tensorflow_text as text\n",
-        "import numpy as np\n",
-        "np.set_printoptions(suppress=True)\n",
-        "\n",
-        "\n",
-        "# Carga el modelo\n",
-        "with tf.keras.utils.custom_object_scope({'AdamWeightDecay': AdamWeightDecay, 'WarmUp': WarmUp}):\n",
-        "    classifier_model = tf.keras.models.load_model('classifier_model.h5', \n",
-        "                                                  custom_objects={'KerasLayer': hub.KerasLayer})"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "EQlIdYjKYAnn",
-        "outputId": "efb1f87f-8b45-4201-ac14-2abdc74b8cfd"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
-            "WARNING:tensorflow:From /usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.\n",
-            "Instructions for updating:\n",
-            "Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089\n",
-            "WARNING:tensorflow:Error in loading the saved optimizer state. As a result, your model is starting with a freshly initialized optimizer.\n"
-          ]
-        }
-      ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "classifier_model.predict([\"LEETSSS GOOO Get those ... outta here!!!!!!\"])"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "6Ma3P-7iYEbA",
-        "outputId": "2e6fbc37-6ab8-4035-c706-f8d6d3d8c7ba"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "1/1 [==============================] - 1s 1s/step\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array([[0.99998355, 0.00001638, 0.00000017]], dtype=float32)"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 4
-        }
       ]
     }
-  ]
 }

 {
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Jh_WkIs1iJDs"
+   },
+   "source": [
+    "# Implementation of text classification with BERT\n",
+    "\n",
+    "\n",
+    "Still Working on it.\n",
+    "\n",
+    "This notebook is based in this TensorFlow tutorial: [Classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_wibert)\n",
+    "\n",
+    "BERT [(article link)](https://arxiv.org/abs/1810.04805) and other Transformer encoder architectures have been wildly successful on a variety of tasks in NLP (natural language processing). They compute vector-space representations of natural language that are suitable for use in deep learning models.\n",
+    "\n",
+    "![](http://www.d2l.ai/_images/nlp-map-pretrain.svg):\n",
+    "\n",
+    "Source: http://www.d2l.ai/chapter_natural-language-processing-pretraining/index.html\n",
+    "\n",
+    "BERT models are usually pre-trained on a large corpus of text, then fine-tuned for specific tasks.\n",
+    "\n",
+    "In this notebook, I am going to use a pretreined BERT to compute vector-space representations of a hate speech dataset to feed two different downsteam Archtectures (CNN and MLP).\n",
+    "\n",
+    "Sentiment Analysis\n",
+    "\n",
+    "This notebook trains a sentiment analysis model to classify the [Hate Speech and Offensive Language Dataset]( https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset) tweets in three classes:\n",
+    " \n",
+    "* 0 - hate speech \n",
+    "* 1 - offensive language \n",
+    "* 2 - neither as positive or negative"
+   ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "Ltc_HOzjX87s"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -q tensorflow-text > /dev/null\n",
+    "!pip install -q tf-models-official > /dev/null"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "EQlIdYjKYAnn",
+    "outputId": "efb1f87f-8b45-4201-ac14-2abdc74b8cfd"
+   },
+   "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
+      "WARNING:tensorflow:From /usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.\n",
+      "Instructions for updating:\n",
+      "Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089\n",
+      "WARNING:tensorflow:Error in loading the saved optimizer state. As a result, your model is starting with a freshly initialized optimizer.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "import tensorflow_hub as hub\n",
+    "from official.nlp.optimization import AdamWeightDecay, WarmUp\n",
+    "\n",
+    "np.set_printoptions(suppress=True)\n",
+    "\n",
+    "\n",
+    "# Carga el modelo\n",
+    "with tf.keras.utils.custom_object_scope({\"AdamWeightDecay\": AdamWeightDecay, \"WarmUp\": WarmUp}):\n",
+    "    classifier_model = tf.keras.models.load_model(\"classifier_model.h5\", custom_objects={\"KerasLayer\": hub.KerasLayer})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "6Ma3P-7iYEbA",
+    "outputId": "2e6fbc37-6ab8-4035-c706-f8d6d3d8c7ba"
+   },
+   "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1/1 [==============================] - 1s 1s/step\n"
+     ]
     },
     {
+     "data": {
+      "text/plain": [
+       "array([[0.99998355, 0.00001638, 0.00000017]], dtype=float32)"
       ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
     }
+   ],
+   "source": [
+    "classifier_model.predict([\"LEETSSS GOOO Get those ... outta here!!!!!!\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }

requirements.in DELETED Viewed

@@ -1,3 +0,0 @@
-gradio
-tensorflow-text
-tf-models-official

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff