Mihai Băluță-Cujbă commited on
Commit
bc46be8
·
1 Parent(s): 1b6fb30

Enhance app.py with detailed docstrings, emoji support, and improved UI layout for code review classification

Browse files
Files changed (1) hide show
  1. app.py +154 -37
app.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
  from functools import lru_cache
@@ -29,8 +39,24 @@ GITHUB_REVIEW_URL = re.compile(
29
  MAX_COMMENT_LENGTH = 4000
30
  REQUEST_TIMEOUT_SECONDS = 10
31
  APP_USER_AGENT = "CodeReviewQualityAnalyzer/0.1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def _extract_comment_id(fragment: str) -> Tuple[str, str]:
 
34
  if not fragment:
35
  raise ValueError("URL must include a fragment pointing to a specific comment.")
36
 
@@ -51,6 +77,7 @@ def _extract_comment_id(fragment: str) -> Tuple[str, str]:
51
  )
52
 
53
  def _github_headers() -> Dict[str, str]:
 
54
  headers = {
55
  "Accept": "application/vnd.github+json",
56
  "User-Agent": APP_USER_AGENT,
@@ -62,6 +89,13 @@ def _github_headers() -> Dict[str, str]:
62
 
63
 
64
  def fetch_comment_from_github(url: str) -> str:
 
 
 
 
 
 
 
65
  match = GITHUB_REVIEW_URL.match(url.strip())
66
  if not match:
67
  raise ValueError("Only GitHub pull request comment URLs are supported at the moment.")
@@ -107,15 +141,31 @@ def fetch_comment_from_github(url: str) -> str:
107
 
108
  @lru_cache(maxsize=1)
109
  def get_zero_shot_pipeline():
110
- return pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)
 
111
 
112
  def build_table(labels: List[str], scores: List[float]) -> List[List[str]]:
 
113
  rows: List[List[str]] = []
114
  for label, score in zip(labels, scores):
115
  rows.append([label, f"{score:.2%}"])
116
  return rows
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def classify_comment(comment: str) -> Dict[str, object]:
 
119
  classifier = get_zero_shot_pipeline()
120
 
121
  type_result = classifier(comment, TYPE_LABELS, multi_label=False)
@@ -130,10 +180,7 @@ def classify_comment(comment: str) -> Dict[str, object]:
130
  type_table = build_table(type_result["labels"], type_result["scores"])
131
  sentiment_table = build_table(sentiment_result["labels"], sentiment_result["scores"])
132
 
133
- summary = (
134
- f"**Feedback Type:** {best_type} ({best_type_score:.1%} confidence)\n"
135
- f"**Sentiment:** {best_sentiment} ({best_sentiment_score:.1%} confidence)\n"
136
- )
137
 
138
  return {
139
  "summary": summary,
@@ -142,6 +189,13 @@ def classify_comment(comment: str) -> Dict[str, object]:
142
  }
143
 
144
  def analyze_comment(comment_text: str, review_url: str):
 
 
 
 
 
 
 
145
  comment_text = (comment_text or "").strip()
146
  review_url = (review_url or "").strip()
147
 
@@ -194,48 +248,111 @@ def analyze_comment(comment_text: str, review_url: str):
194
  fetched_preview,
195
  )
196
 
197
- with gr.Blocks(title="Code Review Quality Analyzer") as demo:
 
 
 
 
 
 
 
198
  gr.Markdown(
199
  "# Code Review Quality Analyzer\n"
200
- "Paste a code review comment or provide a GitHub review URL to classify the feedback type and sentiment.\n"
201
- "This demo uses the open-source zero-shot classifier `facebook/bart-large-mnli` so it runs on CPU-only Spaces."
 
202
  )
203
 
204
- with gr.Row():
205
- comment_input = gr.Textbox(
206
- label="Review Comment Text",
207
- placeholder="Paste a single review comment...",
208
- lines=6,
209
- )
210
- url_input = gr.Textbox(
211
- label="GitHub Review URL",
212
- placeholder="https://github.com/org/repo/pull/123#discussion_r456",
213
- lines=2,
214
- )
215
-
216
- analyze_button = gr.Button("Analyze Review")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- summary_output = gr.Markdown(label="Classification Summary")
219
- type_output = gr.Dataframe(
220
- headers=["Label", "Confidence"],
221
- label="Feedback Type Confidence",
222
- datatype=["str", "str"],
223
- interactive=False,
224
- )
225
- sentiment_output = gr.Dataframe(
226
- headers=["Label", "Confidence"],
227
- label="Sentiment Confidence",
228
- datatype=["str", "str"],
229
- interactive=False,
230
- )
231
- preview_output = gr.Textbox(label="Analyzed Comment", lines=6)
232
- fetched_preview_output = gr.Textbox(label="Fetched GitHub Comment", lines=6)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  analyze_button.click(
235
  analyze_comment,
236
  inputs=[comment_input, url_input],
237
  outputs=[summary_output, type_output, sentiment_output, preview_output, fetched_preview_output],
238
  )
 
 
 
 
 
239
 
240
  if __name__ == "__main__":
241
- demo.launch()
 
1
+ """Code Review Quality Analyzer (Gradio / HF Spaces)
2
+
3
+ This app classifies a single code review comment by:
4
+ - Feedback Type: Logic/Bug, Suggestion, Style/Nitpick, Question, Praise
5
+ - Sentiment: Positive, Neutral, Negative
6
+
7
+ It uses a zero-shot classifier (`facebook/bart-large-mnli`) so it runs on CPU.
8
+ You can paste comment text directly, or fetch from a GitHub PR comment URL.
9
+ """
10
+
11
  import os
12
  import re
13
  from functools import lru_cache
 
39
  MAX_COMMENT_LENGTH = 4000
40
  REQUEST_TIMEOUT_SECONDS = 10
41
  APP_USER_AGENT = "CodeReviewQualityAnalyzer/0.1"
42
+ PIPELINE_MODEL_ID = "facebook/bart-large-mnli"
43
+
44
+ # Simple emojis to make results easier to scan at a glance.
45
+ TYPE_EMOJI = {
46
+ "Logic/Bug": "🐞",
47
+ "Suggestion": "💡",
48
+ "Style/Nitpick": "✏️",
49
+ "Question": "❓",
50
+ "Praise": "🙌",
51
+ }
52
+ SENTIMENT_EMOJI = {
53
+ "Positive": "🙂",
54
+ "Neutral": "😐",
55
+ "Negative": "🙁",
56
+ }
57
 
58
  def _extract_comment_id(fragment: str) -> Tuple[str, str]:
59
+ """Parse the fragment from a PR URL and extract the comment type and id."""
60
  if not fragment:
61
  raise ValueError("URL must include a fragment pointing to a specific comment.")
62
 
 
77
  )
78
 
79
  def _github_headers() -> Dict[str, str]:
80
+ """Build GitHub headers, optionally adding a bearer token to increase limits."""
81
  headers = {
82
  "Accept": "application/vnd.github+json",
83
  "User-Agent": APP_USER_AGENT,
 
89
 
90
 
91
  def fetch_comment_from_github(url: str) -> str:
92
+ """Fetch a PR review comment body from a public GitHub URL.
93
+
94
+ Supported fragments:
95
+ - #discussion_r<ID>
96
+ - #issuecomment-<ID>
97
+ - #pullrequestreview-<ID>
98
+ """
99
  match = GITHUB_REVIEW_URL.match(url.strip())
100
  if not match:
101
  raise ValueError("Only GitHub pull request comment URLs are supported at the moment.")
 
141
 
142
  @lru_cache(maxsize=1)
143
  def get_zero_shot_pipeline():
144
+ """Lazily load the zero-shot pipeline on CPU."""
145
+ return pipeline("zero-shot-classification", model=PIPELINE_MODEL_ID, device=-1)
146
 
147
  def build_table(labels: List[str], scores: List[float]) -> List[List[str]]:
148
+ """Convert labels + scores into a 2D table for display."""
149
  rows: List[List[str]] = []
150
  for label, score in zip(labels, scores):
151
  rows.append([label, f"{score:.2%}"])
152
  return rows
153
 
154
+ def _format_summary(best_type: str, best_type_score: float, best_sentiment: str, best_sentiment_score: float) -> str:
155
+ """Build a professional, emoji-enhanced Markdown summary."""
156
+ type_emoji = TYPE_EMOJI.get(best_type, "")
157
+ sent_emoji = SENTIMENT_EMOJI.get(best_sentiment, "")
158
+ return (
159
+ f"### Result\n"
160
+ f"- Feedback Type: {type_emoji} {best_type} ({best_type_score:.1%})\n"
161
+ f"- Sentiment: {sent_emoji} {best_sentiment} ({best_sentiment_score:.1%})\n"
162
+ f"\n"
163
+ f"Model: `{PIPELINE_MODEL_ID}` · Device: CPU · Method: zero-shot\n"
164
+ )
165
+
166
+
167
  def classify_comment(comment: str) -> Dict[str, object]:
168
+ """Run zero-shot classification for feedback type and sentiment."""
169
  classifier = get_zero_shot_pipeline()
170
 
171
  type_result = classifier(comment, TYPE_LABELS, multi_label=False)
 
180
  type_table = build_table(type_result["labels"], type_result["scores"])
181
  sentiment_table = build_table(sentiment_result["labels"], sentiment_result["scores"])
182
 
183
+ summary = _format_summary(best_type, best_type_score, best_sentiment, best_sentiment_score)
 
 
 
184
 
185
  return {
186
  "summary": summary,
 
189
  }
190
 
191
  def analyze_comment(comment_text: str, review_url: str):
192
+ """Main handler called from the UI.
193
+
194
+ Rules:
195
+ - If both fields are provided, prefer the pasted text (URL is fetched for preview only).
196
+ - If only URL is provided, attempt to fetch the comment body.
197
+ - Validate size and emit structured outputs.
198
+ """
199
  comment_text = (comment_text or "").strip()
200
  review_url = (review_url or "").strip()
201
 
 
248
  fetched_preview,
249
  )
250
 
251
+ def _clear():
252
+ """Reset inputs and outputs to a clean state."""
253
+ return "", "", "", [], [], "", ""
254
+
255
+
256
+ theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")
257
+
258
+ with gr.Blocks(title="Code Review Quality Analyzer", theme=theme) as demo:
259
  gr.Markdown(
260
  "# Code Review Quality Analyzer\n"
261
+ "Classify a code review comment by feedback type and sentiment.\n\n"
262
+ "- Runs on CPU (no GPU needed) using zero-shot classification.\n"
263
+ f"- Model: `{PIPELINE_MODEL_ID}` · Categories are configurable."
264
  )
265
 
266
+ with gr.Row(equal_height=True):
267
+ with gr.Column(scale=1):
268
+ with gr.Tabs():
269
+ with gr.Tab("Paste Comment"):
270
+ comment_input = gr.Textbox(
271
+ label="Review Comment Text",
272
+ placeholder="Paste a single review comment...",
273
+ lines=8,
274
+ autofocus=True,
275
+ )
276
+ with gr.Tab("GitHub URL"):
277
+ url_input = gr.Textbox(
278
+ label="Public GitHub PR Comment URL",
279
+ placeholder="https://github.com/org/repo/pull/123#discussion_r456",
280
+ lines=2,
281
+ info="Works for #discussion_r<ID> and #issuecomment-<ID> on public repos.",
282
+ )
283
+
284
+ gr.Markdown("### Examples")
285
+ gr.Examples(
286
+ examples=[
287
+ [
288
+ "This will break when `user` is None. Consider checking for None before calling `get_id()`.",
289
+ "",
290
+ ],
291
+ [
292
+ "Nice cleanup here — this reads much better now. Thanks!",
293
+ "",
294
+ ],
295
+ [
296
+ "Nit: rename `x` to something more descriptive like `retry_interval`.",
297
+ "",
298
+ ],
299
+ [
300
+ "Why do we need this extra flag? Doesn't the existing `bar` already handle that case?",
301
+ "",
302
+ ],
303
+ [
304
+ "Consider extracting this logic into a helper function to avoid duplication across handlers.",
305
+ "",
306
+ ],
307
+ [
308
+ "This is a risky approach; I recommend reverting and discussing alternatives.",
309
+ "",
310
+ ],
311
+ ],
312
+ inputs=[comment_input, url_input],
313
+ run_on_click=False,
314
+ )
315
 
316
+ with gr.Row():
317
+ analyze_button = gr.Button("Analyze Review", variant="primary")
318
+ clear_button = gr.Button("Clear")
319
+
320
+ with gr.Column(scale=1):
321
+ summary_output = gr.Markdown(label="Classification Summary")
322
+ with gr.Row():
323
+ type_output = gr.Dataframe(
324
+ headers=["Label", "Confidence"],
325
+ label="Feedback Type Confidence",
326
+ datatype=["str", "str"],
327
+ interactive=False,
328
+ )
329
+ sentiment_output = gr.Dataframe(
330
+ headers=["Label", "Confidence"],
331
+ label="Sentiment Confidence",
332
+ datatype=["str", "str"],
333
+ interactive=False,
334
+ )
335
+ with gr.Accordion("Preview", open=False):
336
+ preview_output = gr.Textbox(label="Analyzed Comment", lines=6)
337
+ fetched_preview_output = gr.Textbox(label="Fetched GitHub Comment", lines=6)
338
+
339
+ with gr.Accordion("Tips", open=False):
340
+ gr.Markdown(
341
+ "- Use concise, single-comment inputs for best results.\n"
342
+ "- For organization-wide insights, aggregate predictions across many comments.\n"
343
+ "- Replace the zero-shot model with a fine-tuned one for higher accuracy on your data."
344
+ )
345
 
346
  analyze_button.click(
347
  analyze_comment,
348
  inputs=[comment_input, url_input],
349
  outputs=[summary_output, type_output, sentiment_output, preview_output, fetched_preview_output],
350
  )
351
+ clear_button.click(
352
+ _clear,
353
+ inputs=None,
354
+ outputs=[comment_input, url_input, summary_output, type_output, sentiment_output, preview_output, fetched_preview_output],
355
+ )
356
 
357
  if __name__ == "__main__":
358
+ demo.queue(max_size=16).launch()