CodeBlesser / app.py
hari7261's picture
Update app.py
9d9ab83 verified
raw
history blame
20.6 kB
import re
import random
import gradio as gr
import json
import os
from typing import Dict, List, Any
# Try to import AI libraries
try:
import openai
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
# Try to import sentence transformers for semantic search
try:
from sentence_transformers import SentenceTransformer
import numpy as np
SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
SENTENCE_TRANSFORMERS_AVAILABLE = False
class CodeGenius:
def __init__(self):
self.name = "CodeGenius"
self.user_name = ""
self.conversation_history = []
self.model_loaded = False
self.generator = None
self.tokenizer = None
self.model = None
self.embedding_model = None
# Load programming knowledge base
self.programming_data = self.load_programming_data()
self.knowledge_base = self.prepare_knowledge_base()
# Initialize embedding model for semantic search
self.init_embedding_model()
# Feature flags (env driven)
self.use_local_llm = os.getenv("USE_LOCAL_LLM", "0") == "1"
def load_programming_data(self) -> Dict:
"""Load programming knowledge from JSON file"""
try:
json_path = os.path.join(os.path.dirname(__file__), 'programming_data.json')
with open(json_path, 'r', encoding='utf-8') as file:
return json.load(file)
except FileNotFoundError:
print("Programming data file not found. Using basic data.")
return self.get_fallback_data()
except json.JSONDecodeError:
print("Error reading programming data. Using basic data.")
return self.get_fallback_data()
def get_fallback_data(self) -> Dict:
"""Fallback data if JSON file is not available"""
return {
"languages": {
"Python": {
"paradigm": ["Object-oriented", "Imperative", "Functional", "Procedural"],
"typing": "Dynamic",
"use_cases": ["Web development", "Data science", "AI/ML", "Automation"],
"common_errors": [
{"name": "IndentationError", "solution": "Ensure consistent use of tabs or spaces"},
{"name": "NameError", "solution": "Check if variable is defined before use"}
],
"optimization": ["Use list comprehensions", "Avoid global variables", "Use built-in functions"]
},
"JavaScript": {
"paradigm": ["Event-driven", "Functional", "Object-oriented"],
"typing": "Dynamic",
"use_cases": ["Web development", "Frontend", "Backend", "Mobile apps"],
"common_errors": [
{"name": "TypeError", "solution": "Check variable types before operations"},
{"name": "ReferenceError", "solution": "Ensure variables/functions are in scope"}
],
"optimization": ["Minimize DOM access", "Debounce events", "Use Web Workers"]
}
},
"concepts": {
"OOP": {
"definition": "Object-oriented programming organizes software design around objects rather than functions and logic",
"principles": ["Encapsulation", "Inheritance", "Polymorphism", "Abstraction"]
},
"Functional Programming": {
"definition": "Programming paradigm that treats computation as evaluation of mathematical functions",
"key_features": ["Pure functions", "Immutability", "First-class functions"]
}
}
}
def prepare_knowledge_base(self) -> List[Dict]:
"""Prepare searchable knowledge base from programming data"""
knowledge_items = []
# Process languages data
for lang_name, lang_data in self.programming_data.get('languages', {}).items():
# Basic language info
knowledge_items.append({
'type': 'language_info',
'language': lang_name,
'content': f"{lang_name} programming language: Paradigms - {', '.join(lang_data.get('paradigm', []))}, "
f"Typing - {lang_data.get('typing', 'N/A')}, "
f"Use cases - {', '.join(lang_data.get('use_cases', []))}",
'data': lang_data
})
# Common errors
for error in lang_data.get('common_errors', []):
knowledge_items.append({
'type': 'error',
'language': lang_name,
'content': f"{error.get('name', 'Unknown')} in {lang_name}: "
f"Solution - {error.get('solution', 'N/A')}",
'data': error
})
# Optimization tips
for tip in lang_data.get('optimization', []):
knowledge_items.append({
'type': 'optimization',
'language': lang_name,
'content': f"Optimization tip for {lang_name}: {tip}",
'data': tip
})
# Process programming concepts
for concept_name, concept_data in self.programming_data.get('concepts', {}).items():
knowledge_items.append({
'type': 'concept',
'content': f"{concept_name}: {concept_data.get('definition', 'N/A')}. "
f"Key aspects: {', '.join(concept_data.get('principles', concept_data.get('key_features', [])))}",
'data': concept_data
})
return knowledge_items
def init_embedding_model(self):
"""Initialize embedding model for semantic search"""
if SENTENCE_TRANSFORMERS_AVAILABLE:
try:
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Pre-compute embeddings for knowledge base
self.knowledge_embeddings = self.embedding_model.encode([item['content'] for item in self.knowledge_base])
except Exception as e:
print(f"Failed to load embedding model: {e}")
self.embedding_model = None
else:
self.embedding_model = None
def semantic_search(self, query: str, top_k: int = 3) -> List[Dict]:
"""Perform semantic search on knowledge base"""
if self.embedding_model is None:
return self.fallback_search(query, top_k)
try:
query_embedding = self.embedding_model.encode([query])
similarities = np.dot(query_embedding, self.knowledge_embeddings.T)[0]
top_indices = np.argsort(similarities)[-top_k:][::-1]
results = []
for idx in top_indices:
if similarities[idx] > 0.3: # Threshold for relevance
results.append({
'item': self.knowledge_base[idx],
'score': float(similarities[idx])
})
return results
except Exception as e:
print(f"Semantic search error: {e}")
return self.fallback_search(query, top_k)
def fallback_search(self, query: str, top_k: int = 3) -> List[Dict]:
"""Fallback search using keyword matching"""
query_words = set(query.lower().split())
results = []
for item in self.knowledge_base:
content_words = set(item['content'].lower().split())
overlap = len(query_words.intersection(content_words))
if overlap > 0:
results.append({
'item': item,
'score': overlap / len(query_words)
})
results.sort(key=lambda x: x['score'], reverse=True)
return results[:top_k]
def load_model(self):
"""Load AI model for advanced queries"""
if self.model_loaded:
return True
# Only attempt heavy model if explicitly enabled
if TRANSFORMERS_AVAILABLE and self.use_local_llm:
try:
# Use a code-specific model
model_name = "bigcode/starcoder2-7b"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
# Add pad token if not present
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.generator = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=0 if torch.cuda.is_available() else -1,
return_full_text=False
)
self.model_loaded = True
print("✅ AI model loaded successfully!")
return True
except Exception as e:
print(f"⚠️ Could not load AI model: {str(e)}")
return False
else:
if not TRANSFORMERS_AVAILABLE and self.use_local_llm:
print("🔧 Install transformers and torch for AI features")
return False
def generate_ai_response(self, query: str, context: str = "", code: str = "") -> str:
"""Generate conversational AI response using programming knowledge"""
if not self.model_loaded:
if not self.load_model():
return self.generate_openai_style_response(query, context, code)
try:
# Create a conversational prompt for code assistance
system_prompt = """You are an expert programming assistant with years of experience helping developers.
Your job is to provide helpful, accurate code solutions, explanations, and optimizations.
Provide clear, concise answers with code examples when appropriate.
Explain complex concepts in simple terms and always consider best practices."""
user_prompt = f"""Based on this programming knowledge: {context}
And this provided code: {code}
Please answer this developer's question: {query}
Provide the best solution with explanation and consider edge cases."""
# Generate response
full_prompt = f"{system_prompt}\n\nUser: {user_prompt}\nAssistant:"
response = self.generator(
full_prompt,
max_new_tokens=300,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1,
no_repeat_ngram_size=3
)
if response and len(response) > 0:
generated_text = response[0]["generated_text"]
# Extract only the assistant's response
if "Assistant:" in generated_text:
ai_response = generated_text.split("Assistant:")[-1].strip()
if len(ai_response) > 20:
return ai_response
except Exception as e:
print(f"AI generation error: {e}")
# Fallback to OpenAI-style response
return self.generate_openai_style_response(query, context, code)
def generate_openai_style_response(self, query: str, context: str, code: str) -> str:
"""Generate OpenAI-style conversational response using template"""
query_lower = query.lower()
# Extract key information from context
lang_mentioned = None
for lang in ['python', 'javascript', 'java', 'c++', 'go']:
if lang in query_lower or lang in context.lower():
lang_mentioned = lang
break
if lang_mentioned:
lang_data = self.programming_data.get('languages', {}).get(lang_mentioned.capitalize(), {})
if 'error' in query_lower or 'bug' in query_lower or 'fix' in query_lower:
return self.generate_error_response(lang_mentioned, lang_data, query, code)
elif 'optimiz' in query_lower or 'improve' in query_lower or 'speed' in query_lower:
return self.generate_optimization_response(lang_mentioned, lang_data, code)
elif 'explain' in query_lower or 'how does' in query_lower:
return self.generate_explanation_response(lang_mentioned, lang_data, code)
elif 'generate' in query_lower or 'write' in query_lower or 'create' in query_lower:
return self.generate_code_response(lang_mentioned, lang_data, query)
else:
return self.generate_general_lang_response(lang_mentioned, lang_data, query)
return self.generate_general_programming_response(query, context, code)
def generate_error_response(self, lang: str, lang_data: dict, query: str, code: str) -> str:
"""Generate detailed error explanation and solution"""
common_errors = lang_data.get('common_errors', [])
bullets = ", ".join([e.get('name', 'Unknown') for e in common_errors[:5]]) or "syntax and runtime issues"
steps = [
"Reproduce the error and capture the full traceback/message",
"Locate the failing line and inspect variables/inputs",
"Minimize to a small reproducible example",
"Apply a fix, then add/adjust a test to prevent regressions",
]
suggestions = [f"{e.get('name', 'Error')}: {e.get('solution', '')}" for e in common_errors[:5]]
response = (
f"Debugging {lang}:\n"
f"Common issues: {bullets}.\n\n"
f"Code (context):\n{(code or '# no code provided').strip()}\n\n"
f"Steps:\n- " + "\n- ".join(steps) + "\n\n"
+ ("Hints:\n- " + "\n- ".join(suggestions) if suggestions else "")
)
return response
def generate_optimization_response(self, lang: str, lang_data: dict, code: str) -> str:
tips = lang_data.get('optimization', [])
generic = [
"Profile first; optimize hot paths, not guesses",
"Prefer algorithms/data structures with better complexity",
"Avoid unnecessary allocations and copies",
"Cache expensive results where safe",
]
body = (
f"Performance tips for {lang}:\n- " + "\n- ".join(tips + generic[: max(0, 4 - len(tips))]) +
(f"\n\nCode (context):\n{code.strip()}" if code else "")
)
return body
def generate_explanation_response(self, lang: str, lang_data: dict, code: str) -> str:
if not code:
return (
f"Explain {lang} code: provide the snippet for a targeted walkthrough.\n"
f"Meanwhile, key {lang} concepts: paradigms={', '.join(lang_data.get('paradigm', []))}, typing={lang_data.get('typing', 'n/a')}."
)
outline = [
"High-level: What does this code do?",
"Inputs/outputs: parameters, return values, side effects",
"Control flow: loops, branches, error handling",
"Data structures and complexity",
]
return (
f"Explanation ({lang}):\n"
f"Code:\n{code.strip()}\n\n"
f"Consider:\n- " + "\n- ".join(outline)
)
def generate_code_response(self, lang: str, lang_data: dict, query: str) -> str:
# Provide a minimal idiomatic template per language
templates = {
'python': (
"# minimal CLI template\n"
"import sys\n\n"
"def main(argv: list[str]) -> int:\n"
" # TODO: implement\n"
" print('Hello from CodeGenius')\n"
" return 0\n\n"
"if __name__ == '__main__':\n"
" raise SystemExit(main(sys.argv[1:]))\n"
),
'javascript': (
"// minimal Node.js module template\n"
"export function main(args = []) {\n"
" console.log('Hello from CodeGenius');\n"
"}\n"
),
'java': (
"// minimal Java app template\n"
"public class App {\n"
" public static void main(String[] args) {\n"
" System.out.println(\"Hello from CodeGenius\");\n"
" }\n"
"}\n"
)
}
key = lang.lower()
snippet = templates.get(key, "// Provide more detail to generate specific code.")
return f"Generated starter for {lang}:\n{snippet}"
def generate_general_lang_response(self, lang: str, lang_data: dict, query: str) -> str:
paradigms = ', '.join(lang_data.get('paradigm', []))
use_cases = ', '.join(lang_data.get('use_cases', []))
typing = lang_data.get('typing', 'n/a')
pitfalls = ', '.join([e.get('name', '') for e in lang_data.get('common_errors', [])[:5]])
return (
f"{lang.capitalize()} overview: paradigms={paradigms}; typing={typing}; typical uses={use_cases}.\n"
f"Watch for: {pitfalls}.\n"
f"Query: {query}"
)
def generate_general_programming_response(self, query: str, context: str, code: str) -> str:
parts = []
if context:
parts.append(f"Relevant knowledge: {context}")
if code:
parts.append(f"Code context:\n{code.strip()}")
parts.append(
"Approach: clarify requirements, choose data structures, write small tests, implement incrementally, and profile if performance matters."
)
return f"Answering: {query}\n" + "\n\n".join(parts)
def answer(self, query: str, code: str = "") -> str:
"""Top-level entry: perform semantic search, then answer."""
# Build context from semantic search
top = self.semantic_search(query, top_k=3)
context_str = " | ".join([t['item']['content'] for t in top]) if top else ""
# Use template or local LLM if enabled
return self.generate_ai_response(query, context_str, code)
# -------- Simple UI / Entrypoint --------
def _build_gradio_ui(genius: CodeGenius):
with gr.Blocks(title="CodeGenius") as demo:
gr.Markdown("# CodeGenius\nAn AI-powered programming helper (lightweight mode by default).")
chatbot = gr.Chatbot(height=350)
with gr.Row():
msg = gr.Textbox(label="Ask a question", scale=3)
code_in = gr.Textbox(label="Optional code context", lines=8)
clear = gr.Button("Clear")
state = gr.State([])
def respond(user_message, chat_history, code_text):
if not user_message:
return chat_history or [], chat_history or []
reply = genius.answer(user_message, code_text or "")
chat_history = (chat_history or []) + [[user_message, reply]]
return chat_history, chat_history
msg.submit(respond, [msg, chatbot, code_in], [chatbot, chatbot])
clear.click(lambda: ([], []), None, [chatbot, chatbot], queue=False)
return demo
def main():
genius = CodeGenius()
if os.getenv("RUN_UI", "0") == "1":
demo = _build_gradio_ui(genius)
demo.launch(server_name="127.0.0.1", server_port=int(os.getenv("PORT", "7860")))
return
# CLI mode
print("CodeGenius (CLI). Type 'exit' to quit.")
while True:
try:
q = input("You> ").strip()
except (EOFError, KeyboardInterrupt):
print()
break
if q.lower() in {"exit", "quit"}:
break
ans = genius.answer(q)
print(f"Bot> {ans}\n")
if __name__ == "__main__":
main()