Spaces:
Configuration error
Configuration error
File size: 10,778 Bytes
1a586b9 d964496 1a586b9 58df6b7 1a586b9 112c46b 1a586b9 9c739e4 1a586b9 79ae093 1a586b9 7da5ead 1a586b9 2d967d0 1a586b9 2d967d0 1a586b9 2d967d0 e224c14 2d967d0 e224c14 2d967d0 e224c14 2d967d0 1a586b9 2d967d0 1a586b9 dea8c6e 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 bcb3aca 1a586b9 2d967d0 1a586b9 c0e8ac5 bcb3aca 1a586b9 bcb3aca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | import gradio as gr
import os
import torch
import numpy as np
import random
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, TextIteratorStreamer
from scipy.special import softmax
import logging
import spaces
from threading import Thread
from collections.abc import Iterator
import csv
# Login to Hugging Face
token = os.getenv("hf_token")
login(token=token)
# Increase CSV field size limit
csv.field_size_limit(1000000)
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
# Set a seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
model_paths = [
'karths/binary_classification_train_port',
'karths/binary_classification_train_perf',
"karths/binary_classification_train_main",
"karths/binary_classification_train_secu",
"karths/binary_classification_train_reli",
"karths/binary_classification_train_usab",
"karths/binary_classification_train_comp"
]
quality_mapping = {
'binary_classification_train_port': 'Portability',
'binary_classification_train_main': 'Maintainability',
'binary_classification_train_secu': 'Security',
'binary_classification_train_reli': 'Reliability',
'binary_classification_train_usab': 'Usability',
'binary_classification_train_perf': 'Performance',
'binary_classification_train_comp': 'Compatibility'
}
# Pre-load models and tokenizer for quality prediction
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
models = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths}
def get_quality_name(model_name):
return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality")
def model_prediction(model, text, device):
model.to(device)
model.eval()
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = softmax(logits.cpu().numpy(), axis=1)
avg_prob = np.mean(probs[:, 1])
model.to("cpu")
return avg_prob
# --- Llama 3.2 3B Model Setup ---
LLAMA_MAX_MAX_NEW_TOKENS = 512
LLAMA_DEFAULT_MAX_NEW_TOKENS = 250
LLAMA_MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "1024"))
llama_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
llama_model_id = "meta-llama/Llama-3.2-1B-Instruct"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_id)
llama_model = AutoModelForCausalLM.from_pretrained(
llama_model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
)
llama_model.eval()
if llama_tokenizer.pad_token is None:
llama_tokenizer.pad_token = llama_tokenizer.eos_token
def llama_generate(
message: str,
max_new_tokens: int = LLAMA_DEFAULT_MAX_NEW_TOKENS,
temperature: float = 0.2,
top_p: float = 0.9,
top_k: int = 50,
repetition_penalty: float = 1.2,
) -> str:
inputs = llama_tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=LLAMA_MAX_INPUT_TOKEN_LENGTH).to(llama_model.device)
if inputs.input_ids.shape[1] > LLAMA_MAX_INPUT_TOKEN_LENGTH:
inputs.input_ids = inputs.input_ids[:, -LLAMA_MAX_INPUT_TOKEN_LENGTH:]
gr.Warning(f"Trimmed input from conversation as it was longer than {LLAMA_MAX_INPUT_TOKEN_LENGTH} tokens.")
with torch.no_grad():
generate_ids = llama_model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=top_p,
top_k=top_k,
temperature=temperature,
num_beams=1,
repetition_penalty=repetition_penalty,
pad_token_id=llama_tokenizer.pad_token_id,
eos_token_id=llama_tokenizer.eos_token_id,
)
# Extract only the newly generated tokens
input_length = inputs.input_ids.shape[1]
generated_tokens = generate_ids[0][input_length:]
output_text = llama_tokenizer.decode(generated_tokens, skip_special_tokens=True)
torch.cuda.empty_cache()
return output_text.strip()
def generate_explanation(issue_text, top_quality):
"""Generates an explanation for the *single* top quality above threshold."""
if not top_quality:
return "<div style='color: red;'>No explanation available as no quality tags met the threshold.</div>"
quality_name = top_quality[0][0] # Get the name of the top quality
prompt = f"""Analyze the following issue description based on the quality dimension: {quality_name}.
Issue Description:
---
{issue_text}
---
1. **Justification**: Briefly explain why this issue fails or relates to {quality_name}.
2. **Improved Version**: Suggest how to rewrite the issue description to better meet this quality standard (e.g., making it more clear, concise, or actionable).
Provide your response directly without preamble. Use a clear separation between the justification and the rewrite."""
try:
explanation = llama_generate(prompt)
# Format for better readability. Using linebreaks helps Gradio's Markdown component parse it correctly inside the HTML block.
formatted_explanation = f"<div class='explanation-box'>\n\n### Why this is a {quality_name} issue:\n\n{explanation}\n\n</div>"
return formatted_explanation
except Exception as e:
logging.error(f"Error during Llama generation: {e}")
return "<div style='color: red;'>An error occurred while generating the explanation.</div>"
@spaces.GPU(duration=60)
def main_interface(text):
if not text.strip():
return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", "", ""
if len(text) < 30:
return "<div style='color: red;'>Text is less than 30 characters.</div>", "", ""
device = "cuda" if torch.cuda.is_available() else "cpu"
results = []
for model_path, model in models.items():
quality_name = get_quality_name(model_path)
avg_prob = model_prediction(model, text, device)
if avg_prob >= 0.95: # Keep *all* results above the threshold
results.append((quality_name, avg_prob))
logging.info(f"Model: {model_path}, Quality: {quality_name}, Average Probability: {avg_prob:.3f}")
if not results:
return "<div style='color: red;'>No recommendation. Prediction probability is below the threshold.</div>", "", ""
# Sort and get the top result (if any meet the threshold)
top_result = sorted(results, key=lambda x: x[1], reverse=True)
if top_result:
top_quality = top_result[:1] # Select only the top result
output_html = render_html_output(top_quality)
explanation = generate_explanation(text, top_quality)
else: # Handle case no predictions >= 0.95
output_html = "<div style='color: red;'>No quality tag met the prediction probability threshold (>= 0.95).</div>"
explanation = ""
return output_html, "", explanation
def render_html_output(top_qualities):
# Cleaned up: Removed the duplicate inline <style> tags here.
if not top_qualities: # Handle empty case
return "<div class='quality-container'>No Top Prediction</div>"
quality, _ = top_qualities[0] # We know there is only one
html_content = f"""
<div class="quality-container">
<span class="ranking">Top Prediction</span>
<span class="quality-label">{quality}</span>
</div>
"""
return html_content
example_texts = [
["The algorithm does not accurately distinguish between the positive and negative classes during edge cases.\n\nEnvironment: Production\nReproduction: Run the classifier on the test dataset with known edge cases."],
["The regression tests do not cover scenarios involving concurrent user sessions.\n\nEnvironment: Test automation suite\nReproduction: Update the test scripts to include tests for concurrent sessions."],
["There is frequent miscommunication between the development and QA teams regarding feature specifications.\n\nEnvironment: Inter-team meetings\nReproduction: Audit recent communication logs and meeting notes between the teams."],
["The service-oriented architecture does not effectively isolate failures, leading to cascading failures across services.\n\nEnvironment: Microservices architecture\nReproduction: Simulate a service failure and observe the impact on other services."]
]
# Improved CSS for better layout and appearance in BOTH Light and Dark modes
css = """
.quality-container {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 20px;
padding: 10px;
border: 1px solid var(--border-color-primary, #ddd);
border-radius: 8px;
background-color: var(--background-fill-secondary, #f9f9f9);
color: var(--body-text-color, #000);
}
.quality-label, .ranking {
display: inline-block;
padding: 0.5em 1em;
font-size: 18px;
font-weight: bold;
color: white; /* Always keep text white inside the blue badge */
background-color: #007bff;
border-radius: 0.5rem;
margin-right: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
.explanation-box {
border: 1px solid var(--border-color-primary, #ccc);
padding: 15px;
margin-top: 15px;
border-radius: 8px;
background-color: var(--background-fill-primary, #fff);
color: var(--body-text-color, #000);
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
line-height: 1.5;
}
.explanation-box p {
margin: 8px 0;
}
.explanation-box b {
color: #007bff;
}
/* Fallback explicit styling for Gradio Dark Mode */
.dark .quality-container {
background-color: #1f2937;
border-color: #374151;
color: #f3f4f6;
}
.dark .explanation-box {
background-color: #1f2937;
border-color: #374151;
color: #f3f4f6;
box-shadow: 0 1px 3px rgba(255,255,255,0.05);
}
"""
interface = gr.Interface(
fn=main_interface,
inputs=gr.Textbox(lines=7, label="Issue Description", placeholder="Enter your issue text here"),
outputs=[
gr.HTML(label="Prediction Output"),
gr.Textbox(label="Predictions", visible=False),
gr.Markdown(label="Explanation")
],
title="QualityTagger",
description="This tool classifies text into different quality domains such as Security, Usability, Mantainability, Reliability etc., and provides explanations.",
examples=example_texts,
css=css,
cache_examples=False
)
if __name__ == "__main__":
interface.launch() |