Spaces:
Sleeping
Sleeping
Sushwetabm
commited on
Commit
·
9a2b71a
1
Parent(s):
aff0b1f
updated analyzer.py
Browse files- analyzer.py +23 -20
analyzer.py
CHANGED
|
@@ -192,6 +192,7 @@
|
|
| 192 |
# "error_type": type(e).__name__
|
| 193 |
# }
|
| 194 |
|
|
|
|
| 195 |
# analyzer.py
|
| 196 |
|
| 197 |
import torch
|
|
@@ -207,42 +208,44 @@ formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s")
|
|
| 207 |
handler.setFormatter(formatter)
|
| 208 |
logger.addHandler(handler)
|
| 209 |
|
| 210 |
-
|
| 211 |
def analyze_code(tokenizer, model, language, code):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
start_time = time.time()
|
| 213 |
|
| 214 |
-
prompt
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
f"{{\n"
|
| 220 |
-
f" \"bug_analysis\": [{{\"line_number\": X, \"error_message\": \"...\", \"explanation\": \"...\", \"fix_suggestion\": \"...\"}}],\n"
|
| 221 |
-
f" \"corrected_code\": \"...\"\n"
|
| 222 |
-
f"}}"
|
| 223 |
-
)
|
| 224 |
|
| 225 |
try:
|
|
|
|
| 226 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
json_output = json.loads(cleaned)
|
| 233 |
|
| 234 |
return {
|
| 235 |
-
"bug_analysis":
|
| 236 |
-
"corrected_code":
|
| 237 |
}
|
| 238 |
|
| 239 |
except Exception as e:
|
|
|
|
| 240 |
return {
|
| 241 |
"bug_analysis": [{
|
| 242 |
"line_number": 0,
|
| 243 |
-
"error_message": "
|
| 244 |
"explanation": str(e),
|
| 245 |
-
"fix_suggestion": "Try
|
| 246 |
}],
|
| 247 |
"corrected_code": code
|
| 248 |
}
|
|
|
|
| 192 |
# "error_type": type(e).__name__
|
| 193 |
# }
|
| 194 |
|
| 195 |
+
# analyzer.py
|
| 196 |
# analyzer.py
|
| 197 |
|
| 198 |
import torch
|
|
|
|
| 208 |
handler.setFormatter(formatter)
|
| 209 |
logger.addHandler(handler)
|
| 210 |
|
|
|
|
| 211 |
def analyze_code(tokenizer, model, language, code):
|
| 212 |
+
"""
|
| 213 |
+
Analyze and fix buggy code using CodeT5+ model with 'fix:' prompt prefix.
|
| 214 |
+
Works across multiple programming languages.
|
| 215 |
+
"""
|
| 216 |
start_time = time.time()
|
| 217 |
|
| 218 |
+
# Prepare prompt in CodeT5+ style
|
| 219 |
+
prompt = f"fix: {code.strip()}"
|
| 220 |
+
|
| 221 |
+
logger.info(f"🔍 Starting analysis for language: {language}")
|
| 222 |
+
logger.info(f"🧾 Prompt: {prompt[:80]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
try:
|
| 225 |
+
# Tokenize and generate response
|
| 226 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
|
| 227 |
+
with torch.no_grad():
|
| 228 |
+
output = model.generate(**inputs, max_new_tokens=1024)
|
| 229 |
+
|
| 230 |
+
# Decode output
|
| 231 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
|
| 232 |
|
| 233 |
+
elapsed = round(time.time() - start_time, 2)
|
| 234 |
+
logger.info(f"✅ Inference completed in {elapsed}s")
|
|
|
|
| 235 |
|
| 236 |
return {
|
| 237 |
+
"bug_analysis": [], # Optional: You could add heuristics here
|
| 238 |
+
"corrected_code": response
|
| 239 |
}
|
| 240 |
|
| 241 |
except Exception as e:
|
| 242 |
+
logger.error(f"❌ Error during analysis: {e}")
|
| 243 |
return {
|
| 244 |
"bug_analysis": [{
|
| 245 |
"line_number": 0,
|
| 246 |
+
"error_message": "Inference failed",
|
| 247 |
"explanation": str(e),
|
| 248 |
+
"fix_suggestion": "Try again with simpler code or retry later"
|
| 249 |
}],
|
| 250 |
"corrected_code": code
|
| 251 |
}
|