Davy592 commited on
Commit
79c84e2
·
1 Parent(s): 874b275

First commit

Browse files
.dockerignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .gitignore
3
+ .gitattributes
4
+ README.md
5
+ .env
6
+ .env.local
7
+ __pycache__
8
+ *.pyc
9
+ *.pyo
10
+ *.pyd
11
+ .Python
12
+ env/
13
+ venv/
14
+ .venv
15
+ *.egg-info/
16
+ dist/
17
+ build/
18
+ .pytest_cache/
19
+ .coverage
20
+ htmlcov/
21
+ *.egg
22
+ .vscode/
23
+ .idea/
24
+ *.swp
25
+ *.swo
26
+ *~
27
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Force rebuild - cache busting
4
+ LABEL build_date="$(date)"
5
+
6
+ # Install system dependencies as root
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ build-essential \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Create a non-root user
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV PATH="/home/user/.local/bin:$PATH"
15
+
16
+ WORKDIR /app
17
+
18
+ # Cache busting - force rebuild on new push
19
+ ARG BUILD_DATE
20
+ RUN echo "Build date: ${BUILD_DATE}"
21
+
22
+ # Copy source code and dependency files FIRST (needed by flit)
23
+ COPY --chown=user pyproject.toml ./
24
+ COPY --chown=user nygaardcodecommentclassification ./nygaardcodecommentclassification
25
+ COPY --chown=user app.py ./
26
+
27
+ # Install Python dependencies
28
+ RUN pip install --no-cache-dir --upgrade pip && \
29
+ pip install --no-cache-dir -e ".[frontend]"
30
+
31
+ # Expose port
32
+ EXPOSE 7860
33
+
34
+ # Run gradio on port 7860
35
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,39 @@
1
  ---
2
  title: NygaardCodeComment Frontend
3
- emoji: 🔥
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
 
 
 
 
 
 
7
  pinned: false
8
  ---
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: NygaardCodeComment Frontend
3
+ emoji: 🏆
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: docker
7
+ docker:
8
+ dockerfile: ./Dockerfile
9
+ port: 7860
10
+ env:
11
+ - name: API_URL
12
+ default: https://se4ai2526-uniba-nygaard-nygaardcodecomment-backend.hf.space/
13
  pinned: false
14
  ---
15
 
16
+ # 🔍 Nygaard Code Comment Classifier - Frontend
17
+
18
+ This is the frontend interface for the Nygaard Code Comment Classification system, built with Gradio.
19
+
20
+ ## Overview
21
+
22
+ This Gradio application provides a user-friendly interface to classify code comments into multiple categories across different programming languages (Java, Python, Pharo).
23
+
24
+ ## Deployment on Hugging Face Spaces
25
+
26
+ The application is configured to run on Hugging Face Spaces using Docker. The frontend connects to the backend API hosted at `https://se4ai2526-uniba-nygaard-nygaardcodecomment-backend.hf.space/`.
27
+
28
+ ### Configuration
29
+
30
+ - **SDK**: Docker
31
+ - **Port**: 7860
32
+ - **Python Version**: 3.12
33
+ - **Backend API**: https://se4ai2526-uniba-nygaard-nygaardcodecomment-backend.hf.space/
34
+
35
+ ### Environment Variables
36
+
37
+ - `API_URL`: URL of the backend API (default: https://se4ai2526-uniba-nygaard-nygaardcodecomment-backend.hf.space/)
38
+
39
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio Frontend Application Entry Point for Hugging Face Spaces.
2
+
3
+ This file serves as the entry point for the Gradio application when deployed
4
+ on Hugging Face Spaces. HF Spaces expects an app.py file in the root directory.
5
+ """
6
+
7
+ from nygaardcodecommentclassification.api.frontend import gradio_app
8
+
9
+ if __name__ == "__main__":
10
+ # Launch the Gradio app
11
+ # HF Spaces automatically sets the correct port and host
12
+ gradio_app.launch(server_name="0.0.0.0", server_port=7860)
nygaardcodecommentclassification/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Nygaard Code Comment Classification package.
2
+
3
+ This package provides utilities to download, clean, featurize, train, and
4
+ evaluate models for code comment classification across multiple languages
5
+ (`java`, `python`, and `pharo`).
6
+
7
+ Available submodules
8
+ --------------------
9
+ - `config`: Project paths, constants, and logging setup.
10
+ - `dataset`: Dataset download and cleaning routines.
11
+ - `features`: Feature extraction (embeddings and labels) utilities.
12
+ - `plots`: Basic plotting CLI scaffold.
13
+ - `modeling.train`: Training entry points (CatBoost and SetFit).
14
+ - `modeling.predict`: Evaluation and MLflow logging utilities.
15
+ """
nygaardcodecommentclassification/api/frontend.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio Frontend Application.
2
+
3
+ This module provides a web-based user interface for the code comment classification API.
4
+ Users can input code comments and get real-time predictions through an interactive interface.
5
+ """
6
+
7
+ import os
8
+
9
+ import gradio as gr
10
+ import requests
11
+
12
+ # API configuration for deploy
13
+ API_URL = os.getenv("API_URL", "https://se4ai2526-uniba-nygaard-nygaardcodecomment-backend.hf.space/")
14
+
15
+
16
+ def get_categories_for_language(language: str) -> str:
17
+ """Get supported categories for a specific programming language.
18
+
19
+ Args:
20
+ language: The programming language
21
+
22
+ Returns:
23
+ Formatted string with supported categories
24
+ """
25
+ categories = {
26
+ "Java": [
27
+ "summary: A brief description of the behavior of the code",
28
+ "Ownership: Describes the authors and ownership",
29
+ "Expand: Aims to describe the associated code",
30
+ "usage: Describes how to use the code",
31
+ "Pointer: Points to related code or resources",
32
+ "deprecation: Indicates deprecated code",
33
+ "rational: Explains the reasoning behind the implementation",
34
+ ],
35
+ "Python": [
36
+ "Usage: Describes usage of the code",
37
+ "Parameters: Documents function/method parameters",
38
+ "DevelopmentNotes: Contains notes for developers",
39
+ "Expand: Provides detailed explanations",
40
+ "Summary: Summarizes the functionality",
41
+ ],
42
+ "Pharo": [
43
+ "Keyimplementationpoints: Highlights key implementation details",
44
+ "Example: Provides code examples",
45
+ "Responsibilities: Describes object responsibilities",
46
+ "Intent: Explains the intent or purpose",
47
+ "Keymessages: Documents key messages or methods",
48
+ "Collaborators: Lists collaborating objects/classes",
49
+ ],
50
+ }
51
+
52
+ lang_categories = categories.get(language, [])
53
+ if lang_categories:
54
+ return f"**Supported Categories for {language}:**\n" + "\n".join(
55
+ f"- {cat}" for cat in lang_categories
56
+ )
57
+ return "**Supported Categories:** Not available"
58
+
59
+
60
+ def predict_gradio(text: str, language: str) -> str:
61
+ """Gradio interface function for single text prediction.
62
+
63
+ Args:
64
+ text: The code comment to classify
65
+ language: The programming language
66
+
67
+ Returns:
68
+ Formatted HTML string with prediction results
69
+ """
70
+ if not text.strip():
71
+ return """
72
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
73
+ border-radius: 10px; padding: 20px; margin: 10px 0;
74
+ color: white; text-align: center; font-weight: bold;">
75
+ ⚠️ Please enter a code comment to classify.
76
+ </div>
77
+ """
78
+
79
+ try:
80
+ # Call the API
81
+ response = requests.post(
82
+ f"{API_URL}/predict",
83
+ json={"texts": [text], "language": language.lower(), "model_type": "catboost"},
84
+ timeout=30,
85
+ )
86
+
87
+ if response.status_code == 200:
88
+ data = response.json()
89
+ results = data.get("data", {}).get("results", [])
90
+
91
+ if results and len(results) > 0:
92
+ labels = results[0].get("labels", [])
93
+ if labels:
94
+ labels_html = "".join(
95
+ [
96
+ f'<span style="background: #4CAF50; color: white; padding: 4px 8px; '
97
+ f'margin: 2px; border-radius: 12px; font-size: 14px; display: inline-block;">'
98
+ f"🏷️ {label}</span>"
99
+ for label in labels
100
+ ]
101
+ )
102
+ return f"""
103
+ <div style="background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%);
104
+ border-radius: 15px; padding: 20px; margin: 10px 0;
105
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
106
+ <h3 style="color: white; margin: 0 0 15px 0; text-align: center;">
107
+ ✅ Prediction Successful
108
+ </h3>
109
+ <div style="text-align: center;">
110
+ {labels_html}
111
+ </div>
112
+ </div>
113
+ """
114
+ return """
115
+ <div style="background: linear-gradient(135deg, #ff9800 0%, #f57c00 100%);
116
+ border-radius: 10px; padding: 20px; margin: 10px 0;
117
+ color: white; text-align: center; font-weight: bold;">
118
+ 📊 No labels predicted (below threshold)
119
+ </div>
120
+ """
121
+ return """
122
+ <div style="background: linear-gradient(135deg, #9c27b0 0%, #7b1fa2 100%);
123
+ border-radius: 10px; padding: 20px; margin: 10px 0;
124
+ color: white; text-align: center; font-weight: bold;">
125
+ 🔍 No prediction results available
126
+ </div>
127
+ """
128
+ else:
129
+ return f"""
130
+ <div style="background: linear-gradient(135deg, #f44336 0%, #d32f2f 100%);
131
+ border-radius: 10px; padding: 20px; margin: 10px 0;
132
+ color: white; text-align: center; font-weight: bold;">
133
+ ❌ API Error: {response.status_code}<br>
134
+ <small style="font-weight: normal;">{response.text}</small>
135
+ </div>
136
+ """
137
+
138
+ except requests.exceptions.ConnectionError:
139
+ return f"""
140
+ <div style="background: linear-gradient(135deg, #607d8b 0%, #455a64 100%);
141
+ border-radius: 10px; padding: 20px; margin: 10px 0;
142
+ color: white; text-align: center; font-weight: bold;">
143
+ 🌐 Connection Error<br>
144
+ <small style="font-weight: normal;">Cannot connect to API at {API_URL}</small>
145
+ </div>
146
+ """
147
+ except requests.exceptions.Timeout:
148
+ return """
149
+ <div style="background: linear-gradient(135deg, #ff5722 0%, #d84315 100%);
150
+ border-radius: 10px; padding: 20px; margin: 10px 0;
151
+ color: white; text-align: center; font-weight: bold;">
152
+ ⏱️ Timeout Error<br>
153
+ <small style="font-weight: normal;">The request took too long</small>
154
+ </div>
155
+ """
156
+ except Exception as e:
157
+ return f"""
158
+ <div style="background: linear-gradient(135deg, #9e9e9e 0%, #757575 100%);
159
+ border-radius: 10px; padding: 20px; margin: 10px 0;
160
+ color: white; text-align: center; font-weight: bold;">
161
+ ⚠️ Unexpected Error<br>
162
+ <small style="font-weight: normal;">{str(e)}</small>
163
+ </div>
164
+ """
165
+
166
+
167
+ def update_categories(language: str) -> str:
168
+ """Update the categories display when language changes.
169
+
170
+ Args:
171
+ language: The selected programming language
172
+
173
+ Returns:
174
+ Updated description text with categories
175
+ """
176
+ return get_categories_for_language(language)
177
+
178
+
179
+ # Create Gradio interface with dynamic categories
180
+ with gr.Blocks(title="🔍 Nygaard Code Comment Classifier") as gradio_app:
181
+ gr.Markdown("# 🔍 Nygaard Code Comment Classifier")
182
+ gr.Markdown("Classify code comments into multiple categories using machine learning.")
183
+
184
+ # Language selector
185
+ language_dropdown = gr.Dropdown(
186
+ choices=["Java", "Python", "Pharo"], label="Programming Language", value="Python"
187
+ )
188
+
189
+ # Dynamic categories display
190
+ categories_display = gr.Markdown(value=get_categories_for_language("Python"))
191
+
192
+ # Connect language change to categories update
193
+ language_dropdown.change(
194
+ fn=update_categories, inputs=language_dropdown, outputs=categories_display
195
+ )
196
+
197
+ # Input components
198
+ with gr.Row():
199
+ text_input = gr.Textbox(
200
+ label="Code Comment", placeholder="Enter your code comment here...", lines=5
201
+ )
202
+
203
+ # Output
204
+ output_display = gr.HTML(label="Prediction Result")
205
+
206
+ # Buttons
207
+ with gr.Row():
208
+ clear_btn = gr.Button("Clear", variant="secondary")
209
+ submit_btn = gr.Button("Submit", variant="primary")
210
+
211
+ gr.Examples(
212
+ examples=[
213
+ ["@deprecated Use newMethod() instead", "Java"],
214
+ ["This method calculates the factorial of a number", "Python"],
215
+ ["Returns the sum of all elements in the collection", "Pharo"],
216
+ ],
217
+ inputs=[text_input, language_dropdown],
218
+ )
219
+
220
+ # Connect clear button to reset inputs
221
+ clear_btn.click(fn=lambda: ("", "Python"), inputs=[], outputs=[text_input, language_dropdown])
222
+
223
+ # Connect submit button to prediction function
224
+ submit_btn.click(
225
+ fn=predict_gradio, inputs=[text_input, language_dropdown], outputs=output_display
226
+ )
227
+
228
+
229
+ if __name__ == "__main__":
230
+ gradio_app.launch(server_name="0.0.0.0", server_port=7860)
pyproject.toml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "nygaardcodecommentclassification"
7
+ version = "0.0.1"
8
+ description = "This task is about Code Comment Classification for 3 different programming languages. This task is inspired by a challenge and will be used for the SEAES course"
9
+ authors = [
10
+ { name = "Your name (or your organization/company/team)" },
11
+ ]
12
+
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+
16
+ ]
17
+ dependencies = []
18
+ requires-python = "~=3.12.0"
19
+
20
+ [project.optional-dependencies]
21
+ api = [
22
+ "catboost>=1.2.8",
23
+ "dagshub>=0.6.3",
24
+ "fastapi>=0.115.0",
25
+ "joblib>=1.4.0",
26
+ "loguru",
27
+ "mlflow==2.22.0",
28
+ "numpy>=1.26.0",
29
+ "python-multipart>=0.0.20",
30
+ "setfit>=1.1.3",
31
+ "torch>=2.7.0",
32
+ "uvicorn>=0.32.0",
33
+ "python-dotenv",
34
+ ]
35
+
36
+ dev = [
37
+ "mkdocs",
38
+ "pre-commit>=4.3.0",
39
+ "pytest",
40
+ "ruff",
41
+ ]
42
+
43
+ training = [
44
+ "datasets==4.3.0",
45
+ "deepchecks[nlp]>=0.19.1",
46
+ "optuna>=4.5.0",
47
+ "tqdm",
48
+ "typer",
49
+ ]
50
+
51
+ frontend = [
52
+ "gradio>=5.0.0",
53
+ "requests",
54
+ ]
55
+
56
+ all = [
57
+ "nygaardcodecommentclassification[api,dev,training,frontend]",
58
+ ]
59
+
60
+ [tool.uv]
61
+ override-dependencies = ["pyarrow==21.0.0"]
62
+
63
+ [tool.ruff]
64
+ line-length = 99
65
+ src = ["nygaardcodecommentclassification"]
66
+ include = ["pyproject.toml", "nygaardcodecommentclassification/**/*.py", "tests/**/*.py"]
67
+
68
+ [tool.ruff.lint]
69
+ extend-select = ["I"] # Add import sorting
70
+ # Enable all `pydocstyle` rules, limiting to those that adhere to the
71
+ # Google convention via `convention = "google"`, below.
72
+ select = ["D"]
73
+
74
+ [tool.ruff.lint.isort]
75
+ known-first-party = ["nygaardcodecommentclassification"]
76
+ force-sort-within-sections = true
77
+
78
+ [tool.ruff.lint.pydocstyle]
79
+ convention = "google"
80
+
81
+ [tool.uv.sources]
82
+ torch = [
83
+ { index = "pytorch-gpu" },
84
+ ]
85
+
86
+ [[tool.uv.index]]
87
+ name = "pytorch-gpu"
88
+ url = "https://download.pytorch.org/whl/cu130"
89
+ explicit = true