CodeBlesser / app.py
hari7261's picture
Update app.py
61b7033 verified
raw
history blame
14.1 kB
import re
import random
import gradio as gr
import json
import os
from typing import Dict, List, Any
# Try to import AI libraries
try:
import openai
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
try:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
# Try to import sentence transformers for semantic search
try:
from sentence_transformers import SentenceTransformer
import numpy as np
SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
SENTENCE_TRANSFORMERS_AVAILABLE = False
class CodeGenius:
def __init__(self):
self.name = "CodeGenius"
self.user_name = ""
self.conversation_history = []
self.model_loaded = False
self.generator = None
self.tokenizer = None
self.model = None
self.embedding_model = None
# Load programming knowledge base
self.programming_data = self.load_programming_data()
self.knowledge_base = self.prepare_knowledge_base()
# Initialize embedding model for semantic search
self.init_embedding_model()
def load_programming_data(self) -> Dict:
"""Load programming knowledge from JSON file"""
try:
json_path = os.path.join(os.path.dirname(__file__), 'programming_data.json')
with open(json_path, 'r', encoding='utf-8') as file:
return json.load(file)
except FileNotFoundError:
print("Programming data file not found. Using basic data.")
return self.get_fallback_data()
except json.JSONDecodeError:
print("Error reading programming data. Using basic data.")
return self.get_fallback_data()
def get_fallback_data(self) -> Dict:
"""Fallback data if JSON file is not available"""
return {
"languages": {
"Python": {
"paradigm": ["Object-oriented", "Imperative", "Functional", "Procedural"],
"typing": "Dynamic",
"use_cases": ["Web development", "Data science", "AI/ML", "Automation"],
"common_errors": [
{"name": "IndentationError", "solution": "Ensure consistent use of tabs or spaces"},
{"name": "NameError", "solution": "Check if variable is defined before use"}
],
"optimization": ["Use list comprehensions", "Avoid global variables", "Use built-in functions"]
},
"JavaScript": {
"paradigm": ["Event-driven", "Functional", "Object-oriented"],
"typing": "Dynamic",
"use_cases": ["Web development", "Frontend", "Backend", "Mobile apps"],
"common_errors": [
{"name": "TypeError", "solution": "Check variable types before operations"},
{"name": "ReferenceError", "solution": "Ensure variables/functions are in scope"}
],
"optimization": ["Minimize DOM access", "Debounce events", "Use Web Workers"]
}
},
"concepts": {
"OOP": {
"definition": "Object-oriented programming organizes software design around objects rather than functions and logic",
"principles": ["Encapsulation", "Inheritance", "Polymorphism", "Abstraction"]
},
"Functional Programming": {
"definition": "Programming paradigm that treats computation as evaluation of mathematical functions",
"key_features": ["Pure functions", "Immutability", "First-class functions"]
}
}
}
def prepare_knowledge_base(self) -> List[Dict]:
"""Prepare searchable knowledge base from programming data"""
knowledge_items = []
# Process languages data
for lang_name, lang_data in self.programming_data.get('languages', {}).items():
# Basic language info
knowledge_items.append({
'type': 'language_info',
'language': lang_name,
'content': f"{lang_name} programming language: Paradigms - {', '.join(lang_data.get('paradigm', []))}, "
f"Typing - {lang_data.get('typing', 'N/A')}, "
f"Use cases - {', '.join(lang_data.get('use_cases', []))}",
'data': lang_data
})
# Common errors
for error in lang_data.get('common_errors', []):
knowledge_items.append({
'type': 'error',
'language': lang_name,
'content': f"{error.get('name', 'Unknown')} in {lang_name}: "
f"Solution - {error.get('solution', 'N/A')}",
'data': error
})
# Optimization tips
for tip in lang_data.get('optimization', []):
knowledge_items.append({
'type': 'optimization',
'language': lang_name,
'content': f"Optimization tip for {lang_name}: {tip}",
'data': tip
})
# Process programming concepts
for concept_name, concept_data in self.programming_data.get('concepts', {}).items():
knowledge_items.append({
'type': 'concept',
'content': f"{concept_name}: {concept_data.get('definition', 'N/A')}. "
f"Key aspects: {', '.join(concept_data.get('principles', concept_data.get('key_features', [])))}",
'data': concept_data
})
return knowledge_items
def init_embedding_model(self):
"""Initialize embedding model for semantic search"""
if SENTENCE_TRANSFORMERS_AVAILABLE:
try:
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Pre-compute embeddings for knowledge base
self.knowledge_embeddings = self.embedding_model.encode([item['content'] for item in self.knowledge_base])
except Exception as e:
print(f"Failed to load embedding model: {e}")
self.embedding_model = None
else:
self.embedding_model = None
def semantic_search(self, query: str, top_k: int = 3) -> List[Dict]:
"""Perform semantic search on knowledge base"""
if self.embedding_model is None:
return self.fallback_search(query, top_k)
try:
query_embedding = self.embedding_model.encode([query])
similarities = np.dot(query_embedding, self.knowledge_embeddings.T)[0]
top_indices = np.argsort(similarities)[-top_k:][::-1]
results = []
for idx in top_indices:
if similarities[idx] > 0.3: # Threshold for relevance
results.append({
'item': self.knowledge_base[idx],
'score': float(similarities[idx])
})
return results
except Exception as e:
print(f"Semantic search error: {e}")
return self.fallback_search(query, top_k)
def fallback_search(self, query: str, top_k: int = 3) -> List[Dict]:
"""Fallback search using keyword matching"""
query_words = set(query.lower().split())
results = []
for item in self.knowledge_base:
content_words = set(item['content'].lower().split())
overlap = len(query_words.intersection(content_words))
if overlap > 0:
results.append({
'item': item,
'score': overlap / len(query_words)
})
results.sort(key=lambda x: x['score'], reverse=True)
return results[:top_k]
def load_model(self):
"""Load AI model for advanced queries"""
if self.model_loaded:
return True
if TRANSFORMERS_AVAILABLE:
try:
# Use a code-specific model
model_name = "bigcode/starcoder2-7b"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
# Add pad token if not present
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.generator = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=0 if torch.cuda.is_available() else -1,
return_full_text=False
)
self.model_loaded = True
print("โœ… AI model loaded successfully!")
return True
except Exception as e:
print(f"โš ๏ธ Could not load AI model: {str(e)}")
return False
else:
print("๐Ÿ”ง Install transformers and torch for AI features")
return False
def generate_ai_response(self, query: str, context: str = "", code: str = "") -> str:
"""Generate conversational AI response using programming knowledge"""
if not self.model_loaded:
if not self.load_model():
return self.generate_openai_style_response(query, context, code)
try:
# Create a conversational prompt for code assistance
system_prompt = """You are an expert programming assistant with years of experience helping developers.
Your job is to provide helpful, accurate code solutions, explanations, and optimizations.
Provide clear, concise answers with code examples when appropriate.
Explain complex concepts in simple terms and always consider best practices."""
user_prompt = f"""Based on this programming knowledge: {context}
And this provided code: {code}
Please answer this developer's question: {query}
Provide the best solution with explanation and consider edge cases."""
# Generate response
full_prompt = f"{system_prompt}\n\nUser: {user_prompt}\nAssistant:"
response = self.generator(
full_prompt,
max_new_tokens=300,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1,
no_repeat_ngram_size=3
)
if response and len(response) > 0:
generated_text = response[0]["generated_text"]
# Extract only the assistant's response
if "Assistant:" in generated_text:
ai_response = generated_text.split("Assistant:")[-1].strip()
if len(ai_response) > 20:
return ai_response
except Exception as e:
print(f"AI generation error: {e}")
# Fallback to OpenAI-style response
return self.generate_openai_style_response(query, context, code)
def generate_openai_style_response(self, query: str, context: str, code: str) -> str:
"""Generate OpenAI-style conversational response using template"""
query_lower = query.lower()
# Extract key information from context
lang_mentioned = None
for lang in ['python', 'javascript', 'java', 'c++', 'go']:
if lang in query_lower or lang in context.lower():
lang_mentioned = lang
break
if lang_mentioned:
lang_data = self.programming_data.get('languages', {}).get(lang_mentioned.capitalize(), {})
if 'error' in query_lower or 'bug' in query_lower or 'fix' in query_lower:
return self.generate_error_response(lang_mentioned, lang_data, query, code)
elif 'optimiz' in query_lower or 'improve' in query_lower or 'speed' in query_lower:
return self.generate_optimization_response(lang_mentioned, lang_data, code)
elif 'explain' in query_lower or 'how does' in query_lower:
return self.generate_explanation_response(lang_mentioned, lang_data, code)
elif 'generate' in query_lower or 'write' in query_lower or 'create' in query_lower:
return self.generate_code_response(lang_mentioned, lang_data, query)
else:
return self.generate_general_lang_response(lang_mentioned, lang_data, query)
return self.generate_general_programming_response(query, context, code)
def generate_error_response(self, lang: str, lang_data: dict, query: str, code: str) -> str:
"""Generate detailed error explanation and solution"""
common_errors = lang_data.get('common_errors', [])
response = f"""Let me help you debug this {lang} code. Here's a systematic approach:
๐Ÿ” **Error Analysis:**
First, let's identify the error type and root cause. Common {lang} errors include:
{', '.join([e.get('name', 'Unknown') for e in common_errors[:3]])}
๐Ÿ’ป **Code Inspection:**
For the provided code:
```{lang}
{code if code else '# No code provided'}