lakkiroy commited on
Commit ·
9ed6f59
1
Parent(s): 9fd15f6
Update git-chat application with improved LLM integration and configuration
Browse files- .env.example +8 -4
- README.md +9 -3
- app.py +3 -3
- config.py +4 -3
- requirements.txt +1 -1
- services/chat_service.py +73 -7
- services/simple_llm.py +199 -0
.env.example
CHANGED
|
@@ -1,14 +1,18 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
|
| 4 |
# GitHub Token (optional, for private repos or better rate limits)
|
| 5 |
GITHUB_TOKEN=your_github_token_here
|
| 6 |
|
| 7 |
# LLM Provider Configuration
|
| 8 |
-
LLM_PROVIDER=
|
| 9 |
EMBEDDING_PROVIDER=sentence_transformers
|
| 10 |
|
| 11 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
HUGGINGFACE_MODEL=microsoft/DialoGPT-medium
|
| 13 |
|
| 14 |
# Embedding Model Configuration
|
|
|
|
| 1 |
+
# Groq API Key (recommended for fast and reliable LLM responses)
|
| 2 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 3 |
|
| 4 |
# GitHub Token (optional, for private repos or better rate limits)
|
| 5 |
GITHUB_TOKEN=your_github_token_here
|
| 6 |
|
| 7 |
# LLM Provider Configuration
|
| 8 |
+
LLM_PROVIDER=groq
|
| 9 |
EMBEDDING_PROVIDER=sentence_transformers
|
| 10 |
|
| 11 |
+
# Groq Configuration
|
| 12 |
+
GROQ_MODEL=deepseek-r1-distill-llama-70b
|
| 13 |
+
|
| 14 |
+
# Hugging Face API Token (optional fallback)
|
| 15 |
+
HUGGINGFACE_API_KEY=your_huggingface_token_here
|
| 16 |
HUGGINGFACE_MODEL=microsoft/DialoGPT-medium
|
| 17 |
|
| 18 |
# Embedding Model Configuration
|
README.md
CHANGED
|
@@ -42,10 +42,10 @@ A powerful AI-powered application that allows you to analyze any GitHub reposito
|
|
| 42 |
## 🛠️ Technology Stack
|
| 43 |
|
| 44 |
- **Frontend**: Gradio for the user interface
|
| 45 |
-
- **AI/ML**:
|
| 46 |
- **Vector Database**: ChromaDB for storing code embeddings
|
| 47 |
- **Code Processing**: GitPython for repository cloning
|
| 48 |
-
- **Language Models**:
|
| 49 |
|
| 50 |
## 📁 Supported File Types
|
| 51 |
|
|
@@ -56,7 +56,13 @@ The application processes the following file types:
|
|
| 56 |
|
| 57 |
## 🔧 Configuration
|
| 58 |
|
| 59 |
-
The app uses
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
## 📝 Usage Tips
|
| 62 |
|
|
|
|
| 42 |
## 🛠️ Technology Stack
|
| 43 |
|
| 44 |
- **Frontend**: Gradio for the user interface
|
| 45 |
+
- **AI/ML**: Groq API for fast LLM inference, Sentence Transformers for embeddings
|
| 46 |
- **Vector Database**: ChromaDB for storing code embeddings
|
| 47 |
- **Code Processing**: GitPython for repository cloning
|
| 48 |
+
- **Language Models**: Groq API with DeepSeek-R1-Distill-Llama-70B
|
| 49 |
|
| 50 |
## 📁 Supported File Types
|
| 51 |
|
|
|
|
| 56 |
|
| 57 |
## 🔧 Configuration
|
| 58 |
|
| 59 |
+
The app uses Groq's fast and reliable API for LLM inference and Sentence Transformers for embeddings. To get started:
|
| 60 |
+
|
| 61 |
+
1. Get a free API key from [Groq](https://console.groq.com/)
|
| 62 |
+
2. Set your API key in the environment variable `GROQ_API_KEY`
|
| 63 |
+
3. The app is pre-configured to use the powerful `deepseek-r1-distill-llama-70b` model
|
| 64 |
+
|
| 65 |
+
No additional setup required - just add your Groq API key!
|
| 66 |
|
| 67 |
## 📝 Usage Tips
|
| 68 |
|
app.py
CHANGED
|
@@ -25,9 +25,9 @@ embedding_service = FreeEmbeddingService(
|
|
| 25 |
|
| 26 |
# Initialize chat service for Hugging Face
|
| 27 |
chat_service = FreeChatService(
|
| 28 |
-
llm_provider=
|
| 29 |
-
api_key=
|
| 30 |
-
model=
|
| 31 |
)
|
| 32 |
|
| 33 |
file_processor = FileProcessor(settings.supported_extensions, settings.max_file_size)
|
|
|
|
| 25 |
|
| 26 |
# Initialize chat service for Hugging Face
|
| 27 |
chat_service = FreeChatService(
|
| 28 |
+
llm_provider=settings.llm_provider.value,
|
| 29 |
+
api_key=settings.huggingface_api_key,
|
| 30 |
+
model=settings.huggingface_model
|
| 31 |
)
|
| 32 |
|
| 33 |
file_processor = FileProcessor(settings.supported_extensions, settings.max_file_size)
|
config.py
CHANGED
|
@@ -6,6 +6,7 @@ class LLMProvider(str, Enum):
|
|
| 6 |
OLLAMA = "ollama"
|
| 7 |
GROQ = "groq"
|
| 8 |
HUGGINGFACE = "huggingface"
|
|
|
|
| 9 |
|
| 10 |
class EmbeddingProvider(str, Enum):
|
| 11 |
SENTENCE_TRANSFORMERS = "sentence_transformers"
|
|
@@ -13,11 +14,11 @@ class EmbeddingProvider(str, Enum):
|
|
| 13 |
|
| 14 |
class Settings(BaseSettings):
|
| 15 |
# LLM Configuration
|
| 16 |
-
llm_provider: LLMProvider = LLMProvider.
|
| 17 |
ollama_base_url: str = "http://localhost:11434"
|
| 18 |
ollama_model: str = "llama2"
|
| 19 |
-
groq_api_key: str = ""
|
| 20 |
-
groq_model: str = "
|
| 21 |
huggingface_api_key: str = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 22 |
huggingface_model: str = "microsoft/DialoGPT-medium"
|
| 23 |
|
|
|
|
| 6 |
OLLAMA = "ollama"
|
| 7 |
GROQ = "groq"
|
| 8 |
HUGGINGFACE = "huggingface"
|
| 9 |
+
SIMPLE = "simple"
|
| 10 |
|
| 11 |
class EmbeddingProvider(str, Enum):
|
| 12 |
SENTENCE_TRANSFORMERS = "sentence_transformers"
|
|
|
|
| 14 |
|
| 15 |
class Settings(BaseSettings):
|
| 16 |
# LLM Configuration
|
| 17 |
+
llm_provider: LLMProvider = LLMProvider.GROQ
|
| 18 |
ollama_base_url: str = "http://localhost:11434"
|
| 19 |
ollama_model: str = "llama2"
|
| 20 |
+
groq_api_key: str = os.getenv("GROQ_API_KEY", "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")
|
| 21 |
+
groq_model: str = "deepseek-r1-distill-llama-70b"
|
| 22 |
huggingface_api_key: str = os.getenv("HUGGINGFACE_API_KEY", "")
|
| 23 |
huggingface_model: str = "microsoft/DialoGPT-medium"
|
| 24 |
|
requirements.txt
CHANGED
|
@@ -10,7 +10,7 @@ python-dotenv==1.0.0
|
|
| 10 |
aiofiles==23.2.1
|
| 11 |
sentence-transformers==2.2.2
|
| 12 |
transformers==4.36.0
|
| 13 |
-
torch=
|
| 14 |
huggingface-hub==0.19.4
|
| 15 |
requests==2.31.0
|
| 16 |
numpy==1.24.3
|
|
|
|
| 10 |
aiofiles==23.2.1
|
| 11 |
sentence-transformers==2.2.2
|
| 12 |
transformers==4.36.0
|
| 13 |
+
torch>=2.2.0
|
| 14 |
huggingface-hub==0.19.4
|
| 15 |
requests==2.31.0
|
| 16 |
numpy==1.24.3
|
services/chat_service.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Dict, List
|
|
| 5 |
import json
|
| 6 |
import requests
|
| 7 |
import os
|
|
|
|
| 8 |
|
| 9 |
# For Groq (free tier available)
|
| 10 |
class GroqLLM:
|
|
@@ -37,26 +38,85 @@ class GroqLLM:
|
|
| 37 |
else:
|
| 38 |
raise Exception(f"Groq API error: {response.text}")
|
| 39 |
|
| 40 |
-
# For Hugging Face Inference API
|
| 41 |
class HuggingFaceLLM:
|
| 42 |
-
def __init__(self, api_key: str, model: str = "microsoft/DialoGPT-medium"):
|
| 43 |
self.api_key = api_key
|
| 44 |
self.model = model
|
| 45 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def __call__(self, prompt: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
headers = {"Authorization": f"Bearer {self.api_key}"}
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
response = requests.post(self.base_url, headers=headers, json=data)
|
| 52 |
|
| 53 |
if response.status_code == 200:
|
| 54 |
result = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
if isinstance(result, list) and len(result) > 0:
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return str(result)
|
| 58 |
else:
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
|
| 61 |
class FreeChatService:
|
| 62 |
def __init__(self, llm_provider: str, **kwargs):
|
|
@@ -74,10 +134,16 @@ class FreeChatService:
|
|
| 74 |
model=kwargs.get("model", "mixtral-8x7b-32768")
|
| 75 |
)
|
| 76 |
elif llm_provider == "huggingface":
|
|
|
|
|
|
|
|
|
|
| 77 |
self.llm = HuggingFaceLLM(
|
| 78 |
-
api_key=
|
| 79 |
model=kwargs.get("model", "microsoft/DialoGPT-medium")
|
| 80 |
)
|
|
|
|
|
|
|
|
|
|
| 81 |
else:
|
| 82 |
raise ValueError(f"Unsupported LLM provider: {llm_provider}")
|
| 83 |
|
|
|
|
| 5 |
import json
|
| 6 |
import requests
|
| 7 |
import os
|
| 8 |
+
from .simple_llm import NoAPILLM
|
| 9 |
|
| 10 |
# For Groq (free tier available)
|
| 11 |
class GroqLLM:
|
|
|
|
| 38 |
else:
|
| 39 |
raise Exception(f"Groq API error: {response.text}")
|
| 40 |
|
| 41 |
+
# For Hugging Face Inference API (with fallback to local transformers)
|
| 42 |
class HuggingFaceLLM:
|
| 43 |
+
def __init__(self, api_key: str = "", model: str = "microsoft/DialoGPT-medium"):
|
| 44 |
self.api_key = api_key
|
| 45 |
self.model = model
|
| 46 |
+
self.use_local = not api_key or api_key == "your_huggingface_token_here"
|
| 47 |
+
|
| 48 |
+
if self.use_local:
|
| 49 |
+
try:
|
| 50 |
+
from transformers import pipeline
|
| 51 |
+
print(f"Loading local model: {model}")
|
| 52 |
+
self.pipeline = pipeline("text-generation", model="gpt2", max_length=200)
|
| 53 |
+
print("✅ Local model loaded successfully!")
|
| 54 |
+
except ImportError:
|
| 55 |
+
raise Exception("transformers library not installed. Run: pip install transformers torch")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
raise Exception(f"Failed to load local model: {str(e)}")
|
| 58 |
+
else:
|
| 59 |
+
self.base_url = f"https://api-inference.huggingface.co/models/{model}"
|
| 60 |
|
| 61 |
def __call__(self, prompt: str) -> str:
|
| 62 |
+
if self.use_local:
|
| 63 |
+
try:
|
| 64 |
+
# Use local transformers pipeline
|
| 65 |
+
result = self.pipeline(prompt, max_length=len(prompt.split()) + 100, num_return_sequences=1)
|
| 66 |
+
generated_text = result[0]["generated_text"]
|
| 67 |
+
# Remove the input prompt from the response
|
| 68 |
+
if generated_text.startswith(prompt):
|
| 69 |
+
generated_text = generated_text[len(prompt):].strip()
|
| 70 |
+
return generated_text if generated_text else "I understand your question about the code repository."
|
| 71 |
+
except Exception as e:
|
| 72 |
+
return f"Local model error: {str(e)}"
|
| 73 |
+
|
| 74 |
+
# Original API-based approach
|
| 75 |
headers = {"Authorization": f"Bearer {self.api_key}"}
|
| 76 |
+
|
| 77 |
+
# Use conversational format for DialoGPT
|
| 78 |
+
if "DialoGPT" in self.model:
|
| 79 |
+
data = {
|
| 80 |
+
"inputs": {
|
| 81 |
+
"past_user_inputs": [],
|
| 82 |
+
"generated_responses": [],
|
| 83 |
+
"text": prompt
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
else:
|
| 87 |
+
# Standard text generation format
|
| 88 |
+
data = {
|
| 89 |
+
"inputs": prompt,
|
| 90 |
+
"parameters": {
|
| 91 |
+
"max_length": 500,
|
| 92 |
+
"temperature": 0.7,
|
| 93 |
+
"do_sample": True
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
|
| 97 |
response = requests.post(self.base_url, headers=headers, json=data)
|
| 98 |
|
| 99 |
if response.status_code == 200:
|
| 100 |
result = response.json()
|
| 101 |
+
|
| 102 |
+
# Handle DialoGPT response format
|
| 103 |
+
if "DialoGPT" in self.model and isinstance(result, dict):
|
| 104 |
+
return result.get("generated_text", str(result))
|
| 105 |
+
|
| 106 |
+
# Handle standard text generation response
|
| 107 |
if isinstance(result, list) and len(result) > 0:
|
| 108 |
+
generated_text = result[0].get("generated_text", "")
|
| 109 |
+
# Clean up the response by removing the input prompt
|
| 110 |
+
if generated_text.startswith(prompt):
|
| 111 |
+
generated_text = generated_text[len(prompt):].strip()
|
| 112 |
+
return generated_text
|
| 113 |
+
elif isinstance(result, dict) and "generated_text" in result:
|
| 114 |
+
return result["generated_text"]
|
| 115 |
return str(result)
|
| 116 |
else:
|
| 117 |
+
# More detailed error information
|
| 118 |
+
error_msg = f"Status: {response.status_code}, Response: {response.text}"
|
| 119 |
+
raise Exception(f"HuggingFace API error: {error_msg}")
|
| 120 |
|
| 121 |
class FreeChatService:
|
| 122 |
def __init__(self, llm_provider: str, **kwargs):
|
|
|
|
| 134 |
model=kwargs.get("model", "mixtral-8x7b-32768")
|
| 135 |
)
|
| 136 |
elif llm_provider == "huggingface":
|
| 137 |
+
api_key = kwargs.get("api_key")
|
| 138 |
+
if not api_key or api_key == "your_huggingface_token_here":
|
| 139 |
+
print("⚠️ No valid HuggingFace API key found. Using local model fallback...")
|
| 140 |
self.llm = HuggingFaceLLM(
|
| 141 |
+
api_key=api_key,
|
| 142 |
model=kwargs.get("model", "microsoft/DialoGPT-medium")
|
| 143 |
)
|
| 144 |
+
elif llm_provider == "simple":
|
| 145 |
+
print("🚀 Using Simple LLM (no API key required)")
|
| 146 |
+
self.llm = NoAPILLM()
|
| 147 |
else:
|
| 148 |
raise ValueError(f"Unsupported LLM provider: {llm_provider}")
|
| 149 |
|
services/simple_llm.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple LLM service that works without API keys using basic text processing
|
| 3 |
+
"""
|
| 4 |
+
import re
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
|
| 7 |
+
class SimpleLLM:
|
| 8 |
+
"""A basic LLM that provides helpful responses about code repositories without requiring API keys"""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.code_keywords = {
|
| 12 |
+
'python': ['def ', 'class ', 'import ', 'from ', '__init__', 'self.'],
|
| 13 |
+
'javascript': ['function', 'const ', 'let ', 'var ', 'async', 'await'],
|
| 14 |
+
'java': ['public class', 'private ', 'public ', 'static', 'void'],
|
| 15 |
+
'cpp': ['#include', 'int main', 'class ', 'namespace'],
|
| 16 |
+
'go': ['func ', 'package ', 'import', 'type'],
|
| 17 |
+
'rust': ['fn ', 'struct ', 'impl ', 'use ', 'mod '],
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
def analyze_code_context(self, context: str) -> Dict:
|
| 21 |
+
"""Analyze the code context to extract useful information"""
|
| 22 |
+
lines = context.split('\n')
|
| 23 |
+
|
| 24 |
+
# Detect programming languages
|
| 25 |
+
languages = set()
|
| 26 |
+
for lang, keywords in self.code_keywords.items():
|
| 27 |
+
for keyword in keywords:
|
| 28 |
+
if keyword in context:
|
| 29 |
+
languages.add(lang)
|
| 30 |
+
|
| 31 |
+
# Extract file paths
|
| 32 |
+
file_paths = []
|
| 33 |
+
for line in lines:
|
| 34 |
+
if 'path' in line.lower() and ('/' in line or '\\' in line):
|
| 35 |
+
file_paths.append(line.strip())
|
| 36 |
+
|
| 37 |
+
# Count functions and classes
|
| 38 |
+
functions = len(re.findall(r'\bdef\s+\w+|function\s+\w+|func\s+\w+', context))
|
| 39 |
+
classes = len(re.findall(r'\bclass\s+\w+', context))
|
| 40 |
+
|
| 41 |
+
# Extract imports/dependencies
|
| 42 |
+
imports = re.findall(r'import\s+[\w.]+|from\s+[\w.]+\s+import|#include\s*<[\w.]+>', context)
|
| 43 |
+
|
| 44 |
+
return {
|
| 45 |
+
'languages': list(languages),
|
| 46 |
+
'file_paths': file_paths[:5], # Limit to 5 paths
|
| 47 |
+
'functions_count': functions,
|
| 48 |
+
'classes_count': classes,
|
| 49 |
+
'imports': imports[:10], # Limit to 10 imports
|
| 50 |
+
'total_lines': len(lines)
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def generate_response(self, question: str, context: str) -> str:
|
| 54 |
+
"""Generate a helpful response based on the question and code context"""
|
| 55 |
+
question_lower = question.lower()
|
| 56 |
+
analysis = self.analyze_code_context(context)
|
| 57 |
+
|
| 58 |
+
# Question type detection and response generation
|
| 59 |
+
if any(word in question_lower for word in ['what', 'about', 'project', 'repository']):
|
| 60 |
+
return self._describe_project(analysis, context)
|
| 61 |
+
|
| 62 |
+
elif any(word in question_lower for word in ['structure', 'organized', 'architecture']):
|
| 63 |
+
return self._describe_structure(analysis, context)
|
| 64 |
+
|
| 65 |
+
elif any(word in question_lower for word in ['function', 'method', 'class']):
|
| 66 |
+
return self._describe_functions_classes(analysis, context)
|
| 67 |
+
|
| 68 |
+
elif any(word in question_lower for word in ['dependency', 'dependencies', 'import', 'library']):
|
| 69 |
+
return self._describe_dependencies(analysis, context)
|
| 70 |
+
|
| 71 |
+
elif any(word in question_lower for word in ['test', 'testing']):
|
| 72 |
+
return self._describe_tests(analysis, context)
|
| 73 |
+
|
| 74 |
+
elif any(word in question_lower for word in ['error', 'exception', 'handling']):
|
| 75 |
+
return self._describe_error_handling(analysis, context)
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
return self._general_response(analysis, context)
|
| 79 |
+
|
| 80 |
+
def _describe_project(self, analysis: Dict, context: str) -> str:
|
| 81 |
+
languages = ", ".join(analysis['languages']) if analysis['languages'] else "multiple languages"
|
| 82 |
+
|
| 83 |
+
response = f"This project appears to be written in {languages}. "
|
| 84 |
+
|
| 85 |
+
if analysis['classes_count'] > 0:
|
| 86 |
+
response += f"It contains {analysis['classes_count']} classes "
|
| 87 |
+
if analysis['functions_count'] > 0:
|
| 88 |
+
response += f"and {analysis['functions_count']} functions. "
|
| 89 |
+
|
| 90 |
+
if 'api' in context.lower() or 'endpoint' in context.lower():
|
| 91 |
+
response += "It appears to be an API or web service. "
|
| 92 |
+
|
| 93 |
+
if 'test' in context.lower():
|
| 94 |
+
response += "The project includes test files. "
|
| 95 |
+
|
| 96 |
+
return response.strip()
|
| 97 |
+
|
| 98 |
+
def _describe_structure(self, analysis: Dict, context: str) -> str:
|
| 99 |
+
response = "The code is organized with the following structure:\n\n"
|
| 100 |
+
|
| 101 |
+
if analysis['file_paths']:
|
| 102 |
+
response += "**Key files/directories:**\n"
|
| 103 |
+
for path in analysis['file_paths']:
|
| 104 |
+
response += f"- {path}\n"
|
| 105 |
+
response += "\n"
|
| 106 |
+
|
| 107 |
+
if analysis['languages']:
|
| 108 |
+
response += f"**Languages used:** {', '.join(analysis['languages'])}\n\n"
|
| 109 |
+
|
| 110 |
+
if analysis['classes_count'] > 0:
|
| 111 |
+
response += f"**Classes found:** {analysis['classes_count']}\n"
|
| 112 |
+
if analysis['functions_count'] > 0:
|
| 113 |
+
response += f"**Functions found:** {analysis['functions_count']}\n"
|
| 114 |
+
|
| 115 |
+
return response
|
| 116 |
+
|
| 117 |
+
def _describe_functions_classes(self, analysis: Dict, context: str) -> str:
|
| 118 |
+
response = ""
|
| 119 |
+
|
| 120 |
+
if analysis['classes_count'] > 0:
|
| 121 |
+
response += f"Found {analysis['classes_count']} classes in the codebase. "
|
| 122 |
+
|
| 123 |
+
if analysis['functions_count'] > 0:
|
| 124 |
+
response += f"Found {analysis['functions_count']} functions/methods. "
|
| 125 |
+
|
| 126 |
+
# Extract some actual function/class names from context
|
| 127 |
+
class_names = re.findall(r'class\s+(\w+)', context)
|
| 128 |
+
function_names = re.findall(r'def\s+(\w+)|function\s+(\w+)', context)
|
| 129 |
+
|
| 130 |
+
if class_names:
|
| 131 |
+
response += f"\n\n**Some classes:** {', '.join(class_names[:5])}"
|
| 132 |
+
|
| 133 |
+
if function_names:
|
| 134 |
+
func_list = [name for group in function_names for name in group if name]
|
| 135 |
+
response += f"\n\n**Some functions:** {', '.join(func_list[:5])}"
|
| 136 |
+
|
| 137 |
+
return response if response else "No classes or functions clearly identified in the provided context."
|
| 138 |
+
|
| 139 |
+
def _describe_dependencies(self, analysis: Dict, context: str) -> str:
|
| 140 |
+
if analysis['imports']:
|
| 141 |
+
response = "**Dependencies and imports found:**\n\n"
|
| 142 |
+
for imp in analysis['imports']:
|
| 143 |
+
response += f"- {imp}\n"
|
| 144 |
+
return response
|
| 145 |
+
else:
|
| 146 |
+
return "No clear dependencies or imports identified in the provided context."
|
| 147 |
+
|
| 148 |
+
def _describe_tests(self, analysis: Dict, context: str) -> str:
|
| 149 |
+
if 'test' in context.lower():
|
| 150 |
+
return "Test files appear to be present in this codebase. Look for files with 'test' in their names or directories."
|
| 151 |
+
else:
|
| 152 |
+
return "No obvious test files identified in the provided context."
|
| 153 |
+
|
| 154 |
+
def _describe_error_handling(self, analysis: Dict, context: str) -> str:
|
| 155 |
+
error_patterns = ['try:', 'except:', 'catch', 'throw', 'error', 'exception']
|
| 156 |
+
found_patterns = [pattern for pattern in error_patterns if pattern in context.lower()]
|
| 157 |
+
|
| 158 |
+
if found_patterns:
|
| 159 |
+
return f"Error handling appears to be implemented using: {', '.join(found_patterns)}"
|
| 160 |
+
else:
|
| 161 |
+
return "No obvious error handling patterns identified in the provided context."
|
| 162 |
+
|
| 163 |
+
def _general_response(self, analysis: Dict, context: str) -> str:
|
| 164 |
+
response = "Based on the code context provided:\n\n"
|
| 165 |
+
|
| 166 |
+
if analysis['languages']:
|
| 167 |
+
response += f"- **Languages:** {', '.join(analysis['languages'])}\n"
|
| 168 |
+
|
| 169 |
+
if analysis['total_lines'] > 0:
|
| 170 |
+
response += f"- **Code size:** {analysis['total_lines']} lines analyzed\n"
|
| 171 |
+
|
| 172 |
+
if analysis['functions_count'] > 0:
|
| 173 |
+
response += f"- **Functions:** {analysis['functions_count']} found\n"
|
| 174 |
+
|
| 175 |
+
if analysis['classes_count'] > 0:
|
| 176 |
+
response += f"- **Classes:** {analysis['classes_count']} found\n"
|
| 177 |
+
|
| 178 |
+
response += "\nFor more specific information, please ask about particular aspects of the code."
|
| 179 |
+
|
| 180 |
+
return response
|
| 181 |
+
|
| 182 |
+
class NoAPILLM:
|
| 183 |
+
"""Wrapper to use SimpleLLM with the same interface as other LLMs"""
|
| 184 |
+
|
| 185 |
+
def __init__(self):
|
| 186 |
+
self.simple_llm = SimpleLLM()
|
| 187 |
+
|
| 188 |
+
def __call__(self, prompt: str) -> str:
|
| 189 |
+
# Extract context and question from the prompt
|
| 190 |
+
if "Context from repository:" in prompt and "Question:" in prompt:
|
| 191 |
+
parts = prompt.split("Question:")
|
| 192 |
+
if len(parts) == 2:
|
| 193 |
+
context_part = parts[0].replace("Context from repository:", "").strip()
|
| 194 |
+
question_part = parts[1].replace("Answer:", "").strip()
|
| 195 |
+
|
| 196 |
+
return self.simple_llm.generate_response(question_part, context_part)
|
| 197 |
+
|
| 198 |
+
# Fallback for other prompt formats
|
| 199 |
+
return "I can help analyze code repositories. Please provide specific questions about the codebase structure, functions, dependencies, or other aspects of the code."
|