Spaces:
Sleeping
Sleeping
| import re | |
| import random | |
| import gradio as gr | |
| import json | |
| import os | |
| from typing import Dict, List, Any | |
| # Try to import AI libraries | |
| try: | |
| import openai | |
| OPENAI_AVAILABLE = True | |
| except ImportError: | |
| OPENAI_AVAILABLE = False | |
| try: | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| TRANSFORMERS_AVAILABLE = True | |
| except ImportError: | |
| TRANSFORMERS_AVAILABLE = False | |
| # Try to import sentence transformers for semantic search | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| SENTENCE_TRANSFORMERS_AVAILABLE = True | |
| except ImportError: | |
| SENTENCE_TRANSFORMERS_AVAILABLE = False | |
| class CodeGenius: | |
| def __init__(self): | |
| self.name = "CodeGenius" | |
| self.user_name = "" | |
| self.conversation_history = [] | |
| self.model_loaded = False | |
| self.generator = None | |
| self.tokenizer = None | |
| self.model = None | |
| self.embedding_model = None | |
| # Load programming knowledge base | |
| self.programming_data = self.load_programming_data() | |
| self.knowledge_base = self.prepare_knowledge_base() | |
| # Initialize embedding model for semantic search | |
| self.init_embedding_model() | |
| def load_programming_data(self) -> Dict: | |
| """Load programming knowledge from JSON file""" | |
| try: | |
| json_path = os.path.join(os.path.dirname(__file__), 'programming_data.json') | |
| with open(json_path, 'r', encoding='utf-8') as file: | |
| return json.load(file) | |
| except FileNotFoundError: | |
| print("Programming data file not found. Using basic data.") | |
| return self.get_fallback_data() | |
| except json.JSONDecodeError: | |
| print("Error reading programming data. Using basic data.") | |
| return self.get_fallback_data() | |
| def get_fallback_data(self) -> Dict: | |
| """Fallback data if JSON file is not available""" | |
| return { | |
| "languages": { | |
| "Python": { | |
| "paradigm": ["Object-oriented", "Imperative", "Functional", "Procedural"], | |
| "typing": "Dynamic", | |
| "use_cases": ["Web development", "Data science", "AI/ML", "Automation"], | |
| "common_errors": [ | |
| {"name": "IndentationError", "solution": "Ensure consistent use of tabs or spaces"}, | |
| {"name": "NameError", "solution": "Check if variable is defined before use"} | |
| ], | |
| "optimization": ["Use list comprehensions", "Avoid global variables", "Use built-in functions"] | |
| }, | |
| "JavaScript": { | |
| "paradigm": ["Event-driven", "Functional", "Object-oriented"], | |
| "typing": "Dynamic", | |
| "use_cases": ["Web development", "Frontend", "Backend", "Mobile apps"], | |
| "common_errors": [ | |
| {"name": "TypeError", "solution": "Check variable types before operations"}, | |
| {"name": "ReferenceError", "solution": "Ensure variables/functions are in scope"} | |
| ], | |
| "optimization": ["Minimize DOM access", "Debounce events", "Use Web Workers"] | |
| } | |
| }, | |
| "concepts": { | |
| "OOP": { | |
| "definition": "Object-oriented programming organizes software design around objects rather than functions and logic", | |
| "principles": ["Encapsulation", "Inheritance", "Polymorphism", "Abstraction"] | |
| }, | |
| "Functional Programming": { | |
| "definition": "Programming paradigm that treats computation as evaluation of mathematical functions", | |
| "key_features": ["Pure functions", "Immutability", "First-class functions"] | |
| } | |
| } | |
| } | |
| def prepare_knowledge_base(self) -> List[Dict]: | |
| """Prepare searchable knowledge base from programming data""" | |
| knowledge_items = [] | |
| # Process languages data | |
| for lang_name, lang_data in self.programming_data.get('languages', {}).items(): | |
| # Basic language info | |
| knowledge_items.append({ | |
| 'type': 'language_info', | |
| 'language': lang_name, | |
| 'content': f"{lang_name} programming language: Paradigms - {', '.join(lang_data.get('paradigm', []))}, " | |
| f"Typing - {lang_data.get('typing', 'N/A')}, " | |
| f"Use cases - {', '.join(lang_data.get('use_cases', []))}", | |
| 'data': lang_data | |
| }) | |
| # Common errors | |
| for error in lang_data.get('common_errors', []): | |
| knowledge_items.append({ | |
| 'type': 'error', | |
| 'language': lang_name, | |
| 'content': f"{error.get('name', 'Unknown')} in {lang_name}: " | |
| f"Solution - {error.get('solution', 'N/A')}", | |
| 'data': error | |
| }) | |
| # Optimization tips | |
| for tip in lang_data.get('optimization', []): | |
| knowledge_items.append({ | |
| 'type': 'optimization', | |
| 'language': lang_name, | |
| 'content': f"Optimization tip for {lang_name}: {tip}", | |
| 'data': tip | |
| }) | |
| # Process programming concepts | |
| for concept_name, concept_data in self.programming_data.get('concepts', {}).items(): | |
| knowledge_items.append({ | |
| 'type': 'concept', | |
| 'content': f"{concept_name}: {concept_data.get('definition', 'N/A')}. " | |
| f"Key aspects: {', '.join(concept_data.get('principles', concept_data.get('key_features', [])))}", | |
| 'data': concept_data | |
| }) | |
| return knowledge_items | |
| def init_embedding_model(self): | |
| """Initialize embedding model for semantic search""" | |
| if SENTENCE_TRANSFORMERS_AVAILABLE: | |
| try: | |
| self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Pre-compute embeddings for knowledge base | |
| self.knowledge_embeddings = self.embedding_model.encode([item['content'] for item in self.knowledge_base]) | |
| except Exception as e: | |
| print(f"Failed to load embedding model: {e}") | |
| self.embedding_model = None | |
| else: | |
| self.embedding_model = None | |
| def semantic_search(self, query: str, top_k: int = 3) -> List[Dict]: | |
| """Perform semantic search on knowledge base""" | |
| if self.embedding_model is None: | |
| return self.fallback_search(query, top_k) | |
| try: | |
| query_embedding = self.embedding_model.encode([query]) | |
| similarities = np.dot(query_embedding, self.knowledge_embeddings.T)[0] | |
| top_indices = np.argsort(similarities)[-top_k:][::-1] | |
| results = [] | |
| for idx in top_indices: | |
| if similarities[idx] > 0.3: # Threshold for relevance | |
| results.append({ | |
| 'item': self.knowledge_base[idx], | |
| 'score': float(similarities[idx]) | |
| }) | |
| return results | |
| except Exception as e: | |
| print(f"Semantic search error: {e}") | |
| return self.fallback_search(query, top_k) | |
| def fallback_search(self, query: str, top_k: int = 3) -> List[Dict]: | |
| """Fallback search using keyword matching""" | |
| query_words = set(query.lower().split()) | |
| results = [] | |
| for item in self.knowledge_base: | |
| content_words = set(item['content'].lower().split()) | |
| overlap = len(query_words.intersection(content_words)) | |
| if overlap > 0: | |
| results.append({ | |
| 'item': item, | |
| 'score': overlap / len(query_words) | |
| }) | |
| results.sort(key=lambda x: x['score'], reverse=True) | |
| return results[:top_k] | |
| def load_model(self): | |
| """Load AI model for advanced queries""" | |
| if self.model_loaded: | |
| return True | |
| if TRANSFORMERS_AVAILABLE: | |
| try: | |
| # Use a code-specific model | |
| model_name = "bigcode/starcoder2-7b" | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| low_cpu_mem_usage=True | |
| ) | |
| # Add pad token if not present | |
| if self.tokenizer.pad_token is None: | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| self.generator = pipeline( | |
| "text-generation", | |
| model=self.model, | |
| tokenizer=self.tokenizer, | |
| device=0 if torch.cuda.is_available() else -1, | |
| return_full_text=False | |
| ) | |
| self.model_loaded = True | |
| print("โ AI model loaded successfully!") | |
| return True | |
| except Exception as e: | |
| print(f"โ ๏ธ Could not load AI model: {str(e)}") | |
| return False | |
| else: | |
| print("๐ง Install transformers and torch for AI features") | |
| return False | |
| def generate_ai_response(self, query: str, context: str = "", code: str = "") -> str: | |
| """Generate conversational AI response using programming knowledge""" | |
| if not self.model_loaded: | |
| if not self.load_model(): | |
| return self.generate_openai_style_response(query, context, code) | |
| try: | |
| # Create a conversational prompt for code assistance | |
| system_prompt = """You are an expert programming assistant with years of experience helping developers. | |
| Your job is to provide helpful, accurate code solutions, explanations, and optimizations. | |
| Provide clear, concise answers with code examples when appropriate. | |
| Explain complex concepts in simple terms and always consider best practices.""" | |
| user_prompt = f"""Based on this programming knowledge: {context} | |
| And this provided code: {code} | |
| Please answer this developer's question: {query} | |
| Provide the best solution with explanation and consider edge cases.""" | |
| # Generate response | |
| full_prompt = f"{system_prompt}\n\nUser: {user_prompt}\nAssistant:" | |
| response = self.generator( | |
| full_prompt, | |
| max_new_tokens=300, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| repetition_penalty=1.1, | |
| no_repeat_ngram_size=3 | |
| ) | |
| if response and len(response) > 0: | |
| generated_text = response[0]["generated_text"] | |
| # Extract only the assistant's response | |
| if "Assistant:" in generated_text: | |
| ai_response = generated_text.split("Assistant:")[-1].strip() | |
| if len(ai_response) > 20: | |
| return ai_response | |
| except Exception as e: | |
| print(f"AI generation error: {e}") | |
| # Fallback to OpenAI-style response | |
| return self.generate_openai_style_response(query, context, code) | |
| def generate_openai_style_response(self, query: str, context: str, code: str) -> str: | |
| """Generate OpenAI-style conversational response using template""" | |
| query_lower = query.lower() | |
| # Extract key information from context | |
| lang_mentioned = None | |
| for lang in ['python', 'javascript', 'java', 'c++', 'go']: | |
| if lang in query_lower or lang in context.lower(): | |
| lang_mentioned = lang | |
| break | |
| if lang_mentioned: | |
| lang_data = self.programming_data.get('languages', {}).get(lang_mentioned.capitalize(), {}) | |
| if 'error' in query_lower or 'bug' in query_lower or 'fix' in query_lower: | |
| return self.generate_error_response(lang_mentioned, lang_data, query, code) | |
| elif 'optimiz' in query_lower or 'improve' in query_lower or 'speed' in query_lower: | |
| return self.generate_optimization_response(lang_mentioned, lang_data, code) | |
| elif 'explain' in query_lower or 'how does' in query_lower: | |
| return self.generate_explanation_response(lang_mentioned, lang_data, code) | |
| elif 'generate' in query_lower or 'write' in query_lower or 'create' in query_lower: | |
| return self.generate_code_response(lang_mentioned, lang_data, query) | |
| else: | |
| return self.generate_general_lang_response(lang_mentioned, lang_data, query) | |
| return self.generate_general_programming_response(query, context, code) | |
| def generate_error_response(self, lang: str, lang_data: dict, query: str, code: str) -> str: | |
| """Generate detailed error explanation and solution""" | |
| common_errors = lang_data.get('common_errors', []) | |
| response = f"""Let me help you debug this {lang} code. Here's a systematic approach: | |
| ๐ **Error Analysis:** | |
| First, let's identify the error type and root cause. Common {lang} errors include: | |
| {', '.join([e.get('name', 'Unknown') for e in common_errors[:3]])} | |
| ๐ป **Code Inspection:** | |
| For the provided code: | |
| ```{lang} | |
| {code if code else '# No code provided'} |