Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,6 +45,9 @@ class CodeGenius:
|
|
| 45 |
# Initialize embedding model for semantic search
|
| 46 |
self.init_embedding_model()
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
def load_programming_data(self) -> Dict:
|
| 49 |
"""Load programming knowledge from JSON file"""
|
| 50 |
try:
|
|
@@ -198,12 +201,13 @@ class CodeGenius:
|
|
| 198 |
"""Load AI model for advanced queries"""
|
| 199 |
if self.model_loaded:
|
| 200 |
return True
|
| 201 |
-
|
| 202 |
-
if
|
|
|
|
| 203 |
try:
|
| 204 |
# Use a code-specific model
|
| 205 |
model_name = "bigcode/starcoder2-7b"
|
| 206 |
-
|
| 207 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 208 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 209 |
model_name,
|
|
@@ -211,11 +215,11 @@ class CodeGenius:
|
|
| 211 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 212 |
low_cpu_mem_usage=True
|
| 213 |
)
|
| 214 |
-
|
| 215 |
# Add pad token if not present
|
| 216 |
if self.tokenizer.pad_token is None:
|
| 217 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 218 |
-
|
| 219 |
self.generator = pipeline(
|
| 220 |
"text-generation",
|
| 221 |
model=self.model,
|
|
@@ -223,16 +227,17 @@ class CodeGenius:
|
|
| 223 |
device=0 if torch.cuda.is_available() else -1,
|
| 224 |
return_full_text=False
|
| 225 |
)
|
| 226 |
-
|
| 227 |
self.model_loaded = True
|
| 228 |
print("✅ AI model loaded successfully!")
|
| 229 |
return True
|
| 230 |
-
|
| 231 |
except Exception as e:
|
| 232 |
print(f"⚠️ Could not load AI model: {str(e)}")
|
| 233 |
return False
|
| 234 |
else:
|
| 235 |
-
|
|
|
|
| 236 |
return False
|
| 237 |
|
| 238 |
def generate_ai_response(self, query: str, context: str = "", code: str = "") -> str:
|
|
@@ -312,14 +317,161 @@ Provide the best solution with explanation and consider edge cases."""
|
|
| 312 |
"""Generate detailed error explanation and solution"""
|
| 313 |
common_errors = lang_data.get('common_errors', [])
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
response = (
|
| 316 |
-
f"
|
| 317 |
-
"
|
| 318 |
-
f"
|
| 319 |
-
f"
|
| 320 |
-
"
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Initialize embedding model for semantic search
|
| 46 |
self.init_embedding_model()
|
| 47 |
|
| 48 |
+
# Feature flags (env driven)
|
| 49 |
+
self.use_local_llm = os.getenv("USE_LOCAL_LLM", "0") == "1"
|
| 50 |
+
|
| 51 |
def load_programming_data(self) -> Dict:
|
| 52 |
"""Load programming knowledge from JSON file"""
|
| 53 |
try:
|
|
|
|
| 201 |
"""Load AI model for advanced queries"""
|
| 202 |
if self.model_loaded:
|
| 203 |
return True
|
| 204 |
+
|
| 205 |
+
# Only attempt heavy model if explicitly enabled
|
| 206 |
+
if TRANSFORMERS_AVAILABLE and self.use_local_llm:
|
| 207 |
try:
|
| 208 |
# Use a code-specific model
|
| 209 |
model_name = "bigcode/starcoder2-7b"
|
| 210 |
+
|
| 211 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 212 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 213 |
model_name,
|
|
|
|
| 215 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 216 |
low_cpu_mem_usage=True
|
| 217 |
)
|
| 218 |
+
|
| 219 |
# Add pad token if not present
|
| 220 |
if self.tokenizer.pad_token is None:
|
| 221 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 222 |
+
|
| 223 |
self.generator = pipeline(
|
| 224 |
"text-generation",
|
| 225 |
model=self.model,
|
|
|
|
| 227 |
device=0 if torch.cuda.is_available() else -1,
|
| 228 |
return_full_text=False
|
| 229 |
)
|
| 230 |
+
|
| 231 |
self.model_loaded = True
|
| 232 |
print("✅ AI model loaded successfully!")
|
| 233 |
return True
|
| 234 |
+
|
| 235 |
except Exception as e:
|
| 236 |
print(f"⚠️ Could not load AI model: {str(e)}")
|
| 237 |
return False
|
| 238 |
else:
|
| 239 |
+
if not TRANSFORMERS_AVAILABLE and self.use_local_llm:
|
| 240 |
+
print("🔧 Install transformers and torch for AI features")
|
| 241 |
return False
|
| 242 |
|
| 243 |
def generate_ai_response(self, query: str, context: str = "", code: str = "") -> str:
|
|
|
|
| 317 |
"""Generate detailed error explanation and solution"""
|
| 318 |
common_errors = lang_data.get('common_errors', [])
|
| 319 |
|
| 320 |
+
bullets = ", ".join([e.get('name', 'Unknown') for e in common_errors[:5]]) or "syntax and runtime issues"
|
| 321 |
+
steps = [
|
| 322 |
+
"Reproduce the error and capture the full traceback/message",
|
| 323 |
+
"Locate the failing line and inspect variables/inputs",
|
| 324 |
+
"Minimize to a small reproducible example",
|
| 325 |
+
"Apply a fix, then add/adjust a test to prevent regressions",
|
| 326 |
+
]
|
| 327 |
+
suggestions = [f"{e.get('name', 'Error')}: {e.get('solution', '')}" for e in common_errors[:5]]
|
| 328 |
response = (
|
| 329 |
+
f"Debugging {lang}:\n"
|
| 330 |
+
f"Common issues: {bullets}.\n\n"
|
| 331 |
+
f"Code (context):\n{(code or '# no code provided').strip()}\n\n"
|
| 332 |
+
f"Steps:\n- " + "\n- ".join(steps) + "\n\n"
|
| 333 |
+
+ ("Hints:\n- " + "\n- ".join(suggestions) if suggestions else "")
|
| 334 |
+
)
|
| 335 |
+
return response
|
| 336 |
+
|
| 337 |
+
def generate_optimization_response(self, lang: str, lang_data: dict, code: str) -> str:
|
| 338 |
+
tips = lang_data.get('optimization', [])
|
| 339 |
+
generic = [
|
| 340 |
+
"Profile first; optimize hot paths, not guesses",
|
| 341 |
+
"Prefer algorithms/data structures with better complexity",
|
| 342 |
+
"Avoid unnecessary allocations and copies",
|
| 343 |
+
"Cache expensive results where safe",
|
| 344 |
+
]
|
| 345 |
+
body = (
|
| 346 |
+
f"Performance tips for {lang}:\n- " + "\n- ".join(tips + generic[: max(0, 4 - len(tips))]) +
|
| 347 |
+
(f"\n\nCode (context):\n{code.strip()}" if code else "")
|
| 348 |
+
)
|
| 349 |
+
return body
|
| 350 |
+
|
| 351 |
+
def generate_explanation_response(self, lang: str, lang_data: dict, code: str) -> str:
|
| 352 |
+
if not code:
|
| 353 |
+
return (
|
| 354 |
+
f"Explain {lang} code: provide the snippet for a targeted walkthrough.\n"
|
| 355 |
+
f"Meanwhile, key {lang} concepts: paradigms={', '.join(lang_data.get('paradigm', []))}, typing={lang_data.get('typing', 'n/a')}."
|
| 356 |
+
)
|
| 357 |
+
outline = [
|
| 358 |
+
"High-level: What does this code do?",
|
| 359 |
+
"Inputs/outputs: parameters, return values, side effects",
|
| 360 |
+
"Control flow: loops, branches, error handling",
|
| 361 |
+
"Data structures and complexity",
|
| 362 |
+
]
|
| 363 |
+
return (
|
| 364 |
+
f"Explanation ({lang}):\n"
|
| 365 |
+
f"Code:\n{code.strip()}\n\n"
|
| 366 |
+
f"Consider:\n- " + "\n- ".join(outline)
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
def generate_code_response(self, lang: str, lang_data: dict, query: str) -> str:
|
| 370 |
+
# Provide a minimal idiomatic template per language
|
| 371 |
+
templates = {
|
| 372 |
+
'python': (
|
| 373 |
+
"# minimal CLI template\n"
|
| 374 |
+
"import sys\n\n"
|
| 375 |
+
"def main(argv: list[str]) -> int:\n"
|
| 376 |
+
" # TODO: implement\n"
|
| 377 |
+
" print('Hello from CodeGenius')\n"
|
| 378 |
+
" return 0\n\n"
|
| 379 |
+
"if __name__ == '__main__':\n"
|
| 380 |
+
" raise SystemExit(main(sys.argv[1:]))\n"
|
| 381 |
+
),
|
| 382 |
+
'javascript': (
|
| 383 |
+
"// minimal Node.js module template\n"
|
| 384 |
+
"export function main(args = []) {\n"
|
| 385 |
+
" console.log('Hello from CodeGenius');\n"
|
| 386 |
+
"}\n"
|
| 387 |
+
),
|
| 388 |
+
'java': (
|
| 389 |
+
"// minimal Java app template\n"
|
| 390 |
+
"public class App {\n"
|
| 391 |
+
" public static void main(String[] args) {\n"
|
| 392 |
+
" System.out.println(\"Hello from CodeGenius\");\n"
|
| 393 |
+
" }\n"
|
| 394 |
+
"}\n"
|
| 395 |
+
)
|
| 396 |
+
}
|
| 397 |
+
key = lang.lower()
|
| 398 |
+
snippet = templates.get(key, "// Provide more detail to generate specific code.")
|
| 399 |
+
return f"Generated starter for {lang}:\n{snippet}"
|
| 400 |
+
|
| 401 |
+
def generate_general_lang_response(self, lang: str, lang_data: dict, query: str) -> str:
|
| 402 |
+
paradigms = ', '.join(lang_data.get('paradigm', []))
|
| 403 |
+
use_cases = ', '.join(lang_data.get('use_cases', []))
|
| 404 |
+
typing = lang_data.get('typing', 'n/a')
|
| 405 |
+
pitfalls = ', '.join([e.get('name', '') for e in lang_data.get('common_errors', [])[:5]])
|
| 406 |
+
return (
|
| 407 |
+
f"{lang.capitalize()} overview: paradigms={paradigms}; typing={typing}; typical uses={use_cases}.\n"
|
| 408 |
+
f"Watch for: {pitfalls}.\n"
|
| 409 |
+
f"Query: {query}"
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
def generate_general_programming_response(self, query: str, context: str, code: str) -> str:
|
| 413 |
+
parts = []
|
| 414 |
+
if context:
|
| 415 |
+
parts.append(f"Relevant knowledge: {context}")
|
| 416 |
+
if code:
|
| 417 |
+
parts.append(f"Code context:\n{code.strip()}")
|
| 418 |
+
parts.append(
|
| 419 |
+
"Approach: clarify requirements, choose data structures, write small tests, implement incrementally, and profile if performance matters."
|
| 420 |
+
)
|
| 421 |
+
return f"Answering: {query}\n" + "\n\n".join(parts)
|
| 422 |
+
|
| 423 |
+
def answer(self, query: str, code: str = "") -> str:
|
| 424 |
+
"""Top-level entry: perform semantic search, then answer."""
|
| 425 |
+
# Build context from semantic search
|
| 426 |
+
top = self.semantic_search(query, top_k=3)
|
| 427 |
+
context_str = " | ".join([t['item']['content'] for t in top]) if top else ""
|
| 428 |
+
# Use template or local LLM if enabled
|
| 429 |
+
return self.generate_ai_response(query, context_str, code)
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
# -------- Simple UI / Entrypoint --------
|
| 433 |
+
def _build_gradio_ui(genius: CodeGenius):
|
| 434 |
+
with gr.Blocks(title="CodeGenius") as demo:
|
| 435 |
+
gr.Markdown("# CodeGenius\nAn AI-powered programming helper (lightweight mode by default).")
|
| 436 |
+
chatbot = gr.Chatbot(height=350)
|
| 437 |
+
with gr.Row():
|
| 438 |
+
msg = gr.Textbox(label="Ask a question", scale=3)
|
| 439 |
+
code_in = gr.Textbox(label="Optional code context", lines=8)
|
| 440 |
+
clear = gr.Button("Clear")
|
| 441 |
+
|
| 442 |
+
state = gr.State([])
|
| 443 |
+
|
| 444 |
+
def respond(user_message, chat_history, code_text):
|
| 445 |
+
if not user_message:
|
| 446 |
+
return chat_history or [], chat_history or []
|
| 447 |
+
reply = genius.answer(user_message, code_text or "")
|
| 448 |
+
chat_history = (chat_history or []) + [[user_message, reply]]
|
| 449 |
+
return chat_history, chat_history
|
| 450 |
+
|
| 451 |
+
msg.submit(respond, [msg, chatbot, code_in], [chatbot, chatbot])
|
| 452 |
+
clear.click(lambda: ([], []), None, [chatbot, chatbot], queue=False)
|
| 453 |
+
return demo
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
def main():
|
| 457 |
+
genius = CodeGenius()
|
| 458 |
+
if os.getenv("RUN_UI", "0") == "1":
|
| 459 |
+
demo = _build_gradio_ui(genius)
|
| 460 |
+
demo.launch(server_name="127.0.0.1", server_port=int(os.getenv("PORT", "7860")))
|
| 461 |
+
return
|
| 462 |
+
# CLI mode
|
| 463 |
+
print("CodeGenius (CLI). Type 'exit' to quit.")
|
| 464 |
+
while True:
|
| 465 |
+
try:
|
| 466 |
+
q = input("You> ").strip()
|
| 467 |
+
except (EOFError, KeyboardInterrupt):
|
| 468 |
+
print()
|
| 469 |
+
break
|
| 470 |
+
if q.lower() in {"exit", "quit"}:
|
| 471 |
+
break
|
| 472 |
+
ans = genius.answer(q)
|
| 473 |
+
print(f"Bot> {ans}\n")
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
if __name__ == "__main__":
|
| 477 |
+
main()
|