Mario Faúndez Vidal commited on
Commit
f1f6f45
·
1 Parent(s): b197211

Update project configuration and remove requirements.in

Browse files
.gitignore CHANGED
@@ -128,3 +128,5 @@ dmypy.json
128
  # Pyre type checker
129
  .pyre/
130
  flagged/
 
 
 
128
  # Pyre type checker
129
  .pyre/
130
  flagged/
131
+
132
+ .ruff_cache/
Makefile CHANGED
@@ -1,24 +1,82 @@
1
- SHELL=/bin/sh
2
- export PATH := ./venv/bin:$(PATH)
3
- .PHONY: help
4
- help: ## This help.
5
- @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
6
 
 
 
 
 
7
  .DEFAULT_GOAL := help
8
 
9
- venv:
10
- touch requirements.txt ;\
11
- test -d venv || virtualenv --python=$$PYTHON3 venv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- pip-compile: venv
14
- python -m pip install --upgrade pip;\
15
- pip install pip-tools;\
16
- touch requirements.in ;\
17
- pip-compile --output-file requirements.txt requirements.in;\
18
- pip install -r requirements.txt
19
 
20
- autopep8:
21
- autopep8 -i *.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  clean:
24
- rm -fr venv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: help install dev clean test format lint status run notebook docker-build docker-run
 
 
 
 
2
 
3
+ IMAGE_NAME := classify-text-with-bert-hate-speech
4
+ IMAGE_TAG := local
5
+
6
+ # Default target - show help
7
  .DEFAULT_GOAL := help
8
 
9
+ .ONESHELL:
10
+
11
+ help:
12
+ @echo "Project - Available Commands:"
13
+ @echo ""
14
+ @echo " make install - Install production dependencies with uv"
15
+ @echo " make dev - Install development dependencies with uv"
16
+ @echo " make run - Run the application (app.py)"
17
+ @echo " make notebook - Launch jupyter notebook/lab"
18
+ @echo " make test - Run tests (requires pytest)"
19
+ @echo " make format - Format code (ruff or black if available)"
20
+ @echo " make lint - Run linter (ruff if available)"
21
+ @echo " make clean - Remove venv and build/test artifacts"
22
+ @echo " make status - Show python version and installed packages summary"
23
+ @echo " make docker-build- Build a local docker image"
24
+ @echo " make docker-run - Run the docker image locally"
25
+ @echo ""
26
+
27
+ install:
28
+ @echo "📦 Installing production dependencies with uv..."
29
+ uv sync --no-dev
30
+ @echo "✅ Production dependencies installed successfully!"
31
+
32
+ dev:
33
+ @echo "📦 Installing development dependencies with uv..."
34
+ uv sync --dev
35
+ @echo "✅ Development dependencies installed successfully!"
36
+
37
+ run:
38
+ @echo "Running app.py..."
39
+ uv run python app.py
40
 
41
+ notebook:
42
+ @echo "Launching Jupyter Notebook (or lab if available)..."
43
+ uv run jupyter lab || uv run jupyter notebook
 
 
 
44
 
45
+ test:
46
+ @echo "Running tests with pytest..."
47
+ uv run pytest -q tests || true
48
+
49
+ format:
50
+ @echo "Formatting code with ruff..."
51
+ uv run ruff format . || true
52
+ -uv run ruff check --fix . || true
53
+
54
+ lint:
55
+ @echo "Running linter with ruff..."
56
+ uv run ruff check .
57
+
58
+ status:
59
+ @uv run python --version
60
+ @echo "Installed packages:"
61
+ @uv pip list
62
 
63
  clean:
64
+ @echo "🧹 Cleaning up build and Python artifacts..."
65
+ -rm -rf .venv build/ dist/ *.egg-info .eggs
66
+ -find . -type d -name "__pycache__" -exec rm -rf {} +
67
+ -find . -type f -name "*.py[co]" -delete
68
+ -find . -type f -name ".coverage" -delete
69
+ -find . -type d -name ".pytest_cache" -exec rm -rf {} +
70
+ -find . -type d -name ".ruff_cache" -exec rm -rf {} +
71
+ -find . -type d -name ".mypy_cache" -exec rm -rf {} +
72
+ -find . -type d -name "htmlcov" -exec rm -rf {} +
73
+ -find . -type f -name ".DS_Store" -delete
74
+ @echo "✅ Cleanup completed!"
75
+
76
+ docker-build:
77
+ @echo "Building docker image ${IMAGE_NAME}:${IMAGE_TAG} (requires Docker)"
78
+ @docker build -t ${IMAGE_NAME}:${IMAGE_TAG} . || true
79
+
80
+ docker-run:
81
+ @echo "Running docker image ${IMAGE_NAME}:${IMAGE_TAG} (port 7860 forwarded)"
82
+ @docker run --rm -p 7860:7860 ${IMAGE_NAME}:${IMAGE_TAG}
hate_speech_bert_bert_mlp_in_tensorflow.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
hate_speech_run.ipynb CHANGED
@@ -1,134 +1,133 @@
1
  {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": []
7
- },
8
- "kernelspec": {
9
- "name": "python3",
10
- "display_name": "Python 3"
11
- },
12
- "language_info": {
13
- "name": "python"
14
- },
15
- "gpuClass": "standard"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
- "cells": [
18
- {
19
- "cell_type": "markdown",
20
- "source": [
21
- "# Implementation of text classification with BERT\n",
22
- "\n",
23
- "\n",
24
- "Still Working on it.\n",
25
- "\n",
26
- "This notebook is based in this TensorFlow tutorial: [Classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_wibert)\n",
27
- "\n",
28
- "BERT [(article link)](https://arxiv.org/abs/1810.04805) and other Transformer encoder architectures have been wildly successful on a variety of tasks in NLP (natural language processing). They compute vector-space representations of natural language that are suitable for use in deep learning models.\n",
29
- "\n",
30
- "![](http://www.d2l.ai/_images/nlp-map-pretrain.svg):\n",
31
- "\n",
32
- "Source: http://www.d2l.ai/chapter_natural-language-processing-pretraining/index.html\n",
33
- "\n",
34
- "BERT models are usually pre-trained on a large corpus of text, then fine-tuned for specific tasks.\n",
35
- "\n",
36
- "In this notebook, I am going to use a pretreined BERT to compute vector-space representations of a hate speech dataset to feed two different downsteam Archtectures (CNN and MLP).\n",
37
- "\n",
38
- "Sentiment Analysis\n",
39
- "\n",
40
- "This notebook trains a sentiment analysis model to classify the [Hate Speech and Offensive Language Dataset]( https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset) tweets in three classes:\n",
41
- " \n",
42
- "* 0 - hate speech \n",
43
- "* 1 - offensive language \n",
44
- "* 2 - neither as positive or negative"
45
- ],
46
- "metadata": {
47
- "id": "Jh_WkIs1iJDs"
48
- }
49
  },
 
 
 
 
50
  {
51
- "cell_type": "code",
52
- "execution_count": null,
53
- "metadata": {
54
- "id": "Ltc_HOzjX87s"
55
- },
56
- "outputs": [],
57
- "source": [
58
- "!pip install -q tensorflow-text > /dev/null\n",
59
- "!pip install -q tf-models-official > /dev/null"
60
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  },
 
 
 
 
62
  {
63
- "cell_type": "code",
64
- "source": [
65
- "from official.nlp.optimization import AdamWeightDecay, WarmUp\n",
66
- "import tensorflow as tf\n",
67
- "import tensorflow_hub as hub\n",
68
- "import tensorflow_text as text\n",
69
- "import numpy as np\n",
70
- "np.set_printoptions(suppress=True)\n",
71
- "\n",
72
- "\n",
73
- "# Carga el modelo\n",
74
- "with tf.keras.utils.custom_object_scope({'AdamWeightDecay': AdamWeightDecay, 'WarmUp': WarmUp}):\n",
75
- " classifier_model = tf.keras.models.load_model('classifier_model.h5', \n",
76
- " custom_objects={'KerasLayer': hub.KerasLayer})"
77
- ],
78
- "metadata": {
79
- "colab": {
80
- "base_uri": "https://localhost:8080/"
81
- },
82
- "id": "EQlIdYjKYAnn",
83
- "outputId": "efb1f87f-8b45-4201-ac14-2abdc74b8cfd"
84
- },
85
- "execution_count": null,
86
- "outputs": [
87
- {
88
- "output_type": "stream",
89
- "name": "stderr",
90
- "text": [
91
- "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
92
- "WARNING:tensorflow:From /usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.\n",
93
- "Instructions for updating:\n",
94
- "Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089\n",
95
- "WARNING:tensorflow:Error in loading the saved optimizer state. As a result, your model is starting with a freshly initialized optimizer.\n"
96
- ]
97
- }
98
- ]
99
  },
100
  {
101
- "cell_type": "code",
102
- "source": [
103
- "classifier_model.predict([\"LEETSSS GOOO Get those ... outta here!!!!!!\"])"
104
- ],
105
- "metadata": {
106
- "colab": {
107
- "base_uri": "https://localhost:8080/"
108
- },
109
- "id": "6Ma3P-7iYEbA",
110
- "outputId": "2e6fbc37-6ab8-4035-c706-f8d6d3d8c7ba"
111
- },
112
- "execution_count": null,
113
- "outputs": [
114
- {
115
- "output_type": "stream",
116
- "name": "stdout",
117
- "text": [
118
- "1/1 [==============================] - 1s 1s/step\n"
119
- ]
120
- },
121
- {
122
- "output_type": "execute_result",
123
- "data": {
124
- "text/plain": [
125
- "array([[0.99998355, 0.00001638, 0.00000017]], dtype=float32)"
126
- ]
127
- },
128
- "metadata": {},
129
- "execution_count": 4
130
- }
131
  ]
 
 
 
 
132
  }
133
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "Jh_WkIs1iJDs"
7
+ },
8
+ "source": [
9
+ "# Implementation of text classification with BERT\n",
10
+ "\n",
11
+ "\n",
12
+ "Still Working on it.\n",
13
+ "\n",
14
+ "This notebook is based in this TensorFlow tutorial: [Classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_wibert)\n",
15
+ "\n",
16
+ "BERT [(article link)](https://arxiv.org/abs/1810.04805) and other Transformer encoder architectures have been wildly successful on a variety of tasks in NLP (natural language processing). They compute vector-space representations of natural language that are suitable for use in deep learning models.\n",
17
+ "\n",
18
+ "![](http://www.d2l.ai/_images/nlp-map-pretrain.svg):\n",
19
+ "\n",
20
+ "Source: http://www.d2l.ai/chapter_natural-language-processing-pretraining/index.html\n",
21
+ "\n",
22
+ "BERT models are usually pre-trained on a large corpus of text, then fine-tuned for specific tasks.\n",
23
+ "\n",
24
+ "In this notebook, I am going to use a pretreined BERT to compute vector-space representations of a hate speech dataset to feed two different downsteam Archtectures (CNN and MLP).\n",
25
+ "\n",
26
+ "Sentiment Analysis\n",
27
+ "\n",
28
+ "This notebook trains a sentiment analysis model to classify the [Hate Speech and Offensive Language Dataset]( https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset) tweets in three classes:\n",
29
+ " \n",
30
+ "* 0 - hate speech \n",
31
+ "* 1 - offensive language \n",
32
+ "* 2 - neither as positive or negative"
33
+ ]
34
  },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": null,
38
+ "metadata": {
39
+ "id": "Ltc_HOzjX87s"
40
+ },
41
+ "outputs": [],
42
+ "source": [
43
+ "!pip install -q tensorflow-text > /dev/null\n",
44
+ "!pip install -q tf-models-official > /dev/null"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": null,
50
+ "metadata": {
51
+ "colab": {
52
+ "base_uri": "https://localhost:8080/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  },
54
+ "id": "EQlIdYjKYAnn",
55
+ "outputId": "efb1f87f-8b45-4201-ac14-2abdc74b8cfd"
56
+ },
57
+ "outputs": [
58
  {
59
+ "name": "stderr",
60
+ "output_type": "stream",
61
+ "text": [
62
+ "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n",
63
+ "WARNING:tensorflow:From /usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.\n",
64
+ "Instructions for updating:\n",
65
+ "Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089\n",
66
+ "WARNING:tensorflow:Error in loading the saved optimizer state. As a result, your model is starting with a freshly initialized optimizer.\n"
67
+ ]
68
+ }
69
+ ],
70
+ "source": [
71
+ "import numpy as np\n",
72
+ "import tensorflow as tf\n",
73
+ "import tensorflow_hub as hub\n",
74
+ "from official.nlp.optimization import AdamWeightDecay, WarmUp\n",
75
+ "\n",
76
+ "np.set_printoptions(suppress=True)\n",
77
+ "\n",
78
+ "\n",
79
+ "# Carga el modelo\n",
80
+ "with tf.keras.utils.custom_object_scope({\"AdamWeightDecay\": AdamWeightDecay, \"WarmUp\": WarmUp}):\n",
81
+ " classifier_model = tf.keras.models.load_model(\"classifier_model.h5\", custom_objects={\"KerasLayer\": hub.KerasLayer})"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "metadata": {
88
+ "colab": {
89
+ "base_uri": "https://localhost:8080/"
90
  },
91
+ "id": "6Ma3P-7iYEbA",
92
+ "outputId": "2e6fbc37-6ab8-4035-c706-f8d6d3d8c7ba"
93
+ },
94
+ "outputs": [
95
  {
96
+ "name": "stdout",
97
+ "output_type": "stream",
98
+ "text": [
99
+ "1/1 [==============================] - 1s 1s/step\n"
100
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  },
102
  {
103
+ "data": {
104
+ "text/plain": [
105
+ "array([[0.99998355, 0.00001638, 0.00000017]], dtype=float32)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  ]
107
+ },
108
+ "execution_count": 4,
109
+ "metadata": {},
110
+ "output_type": "execute_result"
111
  }
112
+ ],
113
+ "source": [
114
+ "classifier_model.predict([\"LEETSSS GOOO Get those ... outta here!!!!!!\"])"
115
+ ]
116
+ }
117
+ ],
118
+ "metadata": {
119
+ "colab": {
120
+ "provenance": []
121
+ },
122
+ "gpuClass": "standard",
123
+ "kernelspec": {
124
+ "display_name": "Python 3",
125
+ "name": "python3"
126
+ },
127
+ "language_info": {
128
+ "name": "python"
129
+ }
130
+ },
131
+ "nbformat": 4,
132
+ "nbformat_minor": 0
133
  }
requirements.in DELETED
@@ -1,3 +0,0 @@
1
- gradio
2
- tensorflow-text
3
- tf-models-official
 
 
 
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff