aghilsabu commited on
Commit
2e0dde8
·
1 Parent(s): 960f6e1

feat: add AI-powered code analysis with Gemini

Browse files
Files changed (1) hide show
  1. src/core/analyzer.py +292 -0
src/core/analyzer.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code Analyzer Module
3
+
4
+ Uses LlamaIndex + AI models for intelligent code analysis.
5
+ """
6
+
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from typing import Optional, List, Dict
10
+
11
+ from ..config import get_config
12
+
13
+ logger = logging.getLogger("codeatlas.analyzer")
14
+
15
+ # LlamaIndex imports
16
+ try:
17
+ from llama_index.core.llms import ChatMessage
18
+ LLAMAINDEX_AVAILABLE = True
19
+ except ImportError:
20
+ LLAMAINDEX_AVAILABLE = False
21
+ logger.warning("LlamaIndex core not available")
22
+
23
+ try:
24
+ from llama_index.llms.gemini import Gemini
25
+ GEMINI_AVAILABLE = True
26
+ except ImportError:
27
+ GEMINI_AVAILABLE = False
28
+ logger.warning("LlamaIndex Gemini not available")
29
+
30
+ try:
31
+ from llama_index.llms.openai import OpenAI
32
+ OPENAI_AVAILABLE = True
33
+ except ImportError:
34
+ OPENAI_AVAILABLE = False
35
+ logger.warning("LlamaIndex OpenAI not available")
36
+
37
+ # Fallback to google-genai
38
+ try:
39
+ from google import genai
40
+ from google.genai import types
41
+ GENAI_AVAILABLE = True
42
+ except ImportError:
43
+ GENAI_AVAILABLE = False
44
+
45
+
46
+ # System prompts for different analysis tasks
47
+ ARCHITECT_PROMPT = """You are CodeAtlas, an expert software architect. Generate a Graphviz DOT diagram showing code architecture with RELATIONSHIPS.
48
+
49
+ CRITICAL CONSTRAINTS:
50
+ - Maximum 15-20 nodes (focus on KEY architectural components)
51
+ - Maximum 25-30 edges (most important relationships)
52
+ - Group related components into subgraphs
53
+ - Omit trivial files (tests, configs, utilities)
54
+
55
+ WHAT TO SHOW:
56
+ - Main entry points and core modules
57
+ - Key classes/services with clear responsibilities
58
+ - Important data flow and dependencies
59
+ - Layer boundaries (API, Business Logic, Data)
60
+
61
+ RULES:
62
+ 1. Start with: digraph CodeArchitecture {
63
+ 2. Every diagram MUST have arrows showing component connections
64
+ 3. Use actual class/file names from the code
65
+ 4. Group related items in clusters with descriptive labels
66
+ 5. Use colors to distinguish layers/types
67
+
68
+ EXAMPLE:
69
+ ```dot
70
+ digraph CodeArchitecture {
71
+ rankdir=TB;
72
+ node [shape=box, style="rounded,filled", fontname="Helvetica"];
73
+
74
+ subgraph cluster_api {
75
+ label="API Layer";
76
+ style="rounded,filled";
77
+ fillcolor="#e8f5e9";
78
+ Routes; Handlers;
79
+ }
80
+
81
+ subgraph cluster_services {
82
+ label="Business Logic";
83
+ style="rounded,filled";
84
+ fillcolor="#e3f2fd";
85
+ UserService; DataProcessor;
86
+ }
87
+
88
+ Routes -> Handlers;
89
+ Handlers -> UserService;
90
+ Handlers -> DataProcessor;
91
+ }
92
+ ```
93
+
94
+ Generate ONLY valid DOT code. Focus on architectural clarity."""
95
+
96
+ SUMMARY_PROMPT = """You are CodeAtlas. Analyze the codebase and provide a concise summary.
97
+
98
+ Include:
99
+ 1. **Project Overview**: What does this codebase do?
100
+ 2. **Technology Stack**: Languages, frameworks, key dependencies
101
+ 3. **Architecture Pattern**: MVC, microservices, monolith, etc.
102
+ 4. **Key Components**: Main modules and their responsibilities
103
+ 5. **Entry Points**: Where does execution start?
104
+
105
+ Keep it concise (200-300 words). Be specific about actual file/class names."""
106
+
107
+ CHAT_PROMPT = """You are CodeAtlas, an expert software architect assistant.
108
+
109
+ You're analyzing a codebase and helping answer questions about its architecture.
110
+ Use the provided code context to give accurate, specific answers.
111
+ Reference actual file names, class names, and code patterns when relevant.
112
+
113
+ Be helpful, concise, and technical. If you're unsure about something, say so."""
114
+
115
+
116
+ @dataclass
117
+ class AnalysisResult:
118
+ """Result of code analysis."""
119
+ content: str
120
+ success: bool = True
121
+ error: Optional[str] = None
122
+
123
+
124
+ class CodeAnalyzer:
125
+ """Analyzes code using LlamaIndex and AI models."""
126
+
127
+ def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None):
128
+ self.config = get_config()
129
+ self.api_key = api_key or self.config.gemini_api_key
130
+ self.model_name = model_name or self.config.models.get_model_id(self.config.current_model)
131
+ self._llm = None
132
+
133
+ @property
134
+ def llm(self):
135
+ """Get or create the LLM instance."""
136
+ if self._llm is None:
137
+ self._llm = self._create_llm()
138
+ return self._llm
139
+
140
+ def _create_llm(self):
141
+ """Create appropriate LLM based on model name."""
142
+ is_openai = self.config.models.is_openai_model(self.model_name)
143
+
144
+ if is_openai:
145
+ if not OPENAI_AVAILABLE:
146
+ raise ValueError("OpenAI support not available. Install llama-index-llms-openai")
147
+ api_key = self.config.openai_api_key or self.api_key
148
+ return OpenAI(api_key=api_key, model=self.model_name, temperature=0.7, max_tokens=4096)
149
+ else:
150
+ if GEMINI_AVAILABLE:
151
+ return Gemini(
152
+ api_key=self.api_key,
153
+ model=f"models/{self.model_name}",
154
+ temperature=0.7,
155
+ max_tokens=4096,
156
+ )
157
+ elif GENAI_AVAILABLE:
158
+ return None # Will use fallback
159
+ else:
160
+ raise ValueError("No AI backend available")
161
+
162
+ def _generate_with_llamaindex(self, system_prompt: str, user_prompt: str) -> str:
163
+ """Generate content using LlamaIndex."""
164
+ if self.llm is None:
165
+ return self._generate_with_genai(system_prompt, user_prompt)
166
+
167
+ messages = [
168
+ ChatMessage(role="system", content=system_prompt),
169
+ ChatMessage(role="user", content=user_prompt),
170
+ ]
171
+ response = self.llm.chat(messages)
172
+ return response.message.content
173
+
174
+ def _generate_with_genai(self, system_prompt: str, user_prompt: str) -> str:
175
+ """Generate content using google-genai directly (fallback)."""
176
+ if not GENAI_AVAILABLE:
177
+ raise ValueError("No AI backend available")
178
+
179
+ client = genai.Client(api_key=self.api_key)
180
+ response = client.models.generate_content(
181
+ model=self.model_name,
182
+ contents=[user_prompt],
183
+ config=types.GenerateContentConfig(
184
+ system_instruction=system_prompt,
185
+ temperature=0.7,
186
+ max_output_tokens=4096,
187
+ )
188
+ )
189
+ return response.text or ""
190
+
191
+ def generate_diagram(self, code_context: str) -> AnalysisResult:
192
+ """Generate an architecture diagram from code context.
193
+
194
+ Args:
195
+ code_context: Formatted code content
196
+
197
+ Returns:
198
+ AnalysisResult with DOT diagram or error
199
+ """
200
+ user_prompt = f"""Analyze this codebase and generate an architecture diagram:
201
+
202
+ {code_context}
203
+
204
+ Generate a Graphviz DOT diagram showing the main components and their relationships."""
205
+
206
+ try:
207
+ logger.info(f"Generating diagram with {self.model_name}")
208
+ content = self._generate_with_llamaindex(ARCHITECT_PROMPT, user_prompt)
209
+
210
+ if not content.strip():
211
+ return AnalysisResult(content="", success=False, error="Empty response from AI")
212
+
213
+ # Extract DOT content
214
+ import re
215
+ match = re.search(r"```(?:dot|graphviz)?\s*(.*?)\s*```", content, re.DOTALL)
216
+ dot_content = match.group(1).strip() if match else content.strip()
217
+
218
+ # Validate DOT code
219
+ if "digraph" not in dot_content and "graph" not in dot_content:
220
+ return AnalysisResult(
221
+ content="",
222
+ success=False,
223
+ error=f"Invalid DOT code: {dot_content[:200]}"
224
+ )
225
+
226
+ return AnalysisResult(content=dot_content)
227
+
228
+ except Exception as e:
229
+ logger.exception("Diagram generation failed")
230
+ error_str = str(e)
231
+ if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
232
+ return AnalysisResult(content="", success=False, error="Rate limited. Please wait and try again.")
233
+ elif "401" in error_str or "403" in error_str:
234
+ return AnalysisResult(content="", success=False, error="Invalid API key.")
235
+ return AnalysisResult(content="", success=False, error=str(e))
236
+
237
+ def generate_summary(self, code_context: str) -> AnalysisResult:
238
+ """Generate a summary of the codebase.
239
+
240
+ Args:
241
+ code_context: Formatted code content
242
+
243
+ Returns:
244
+ AnalysisResult with summary or error
245
+ """
246
+ user_prompt = f"""Analyze this codebase:
247
+
248
+ {code_context}
249
+
250
+ Provide a concise summary."""
251
+
252
+ try:
253
+ logger.info(f"Generating summary with {self.model_name}")
254
+ content = self._generate_with_llamaindex(SUMMARY_PROMPT, user_prompt)
255
+ return AnalysisResult(content=content.strip())
256
+ except Exception as e:
257
+ logger.exception("Summary generation failed")
258
+ return AnalysisResult(content="", success=False, error=str(e))
259
+
260
+ def chat(self, message: str, code_context: str, history: Optional[List[Dict]] = None) -> AnalysisResult:
261
+ """Chat about the codebase.
262
+
263
+ Args:
264
+ message: User's question
265
+ code_context: Formatted code content (or DOT diagram)
266
+ history: Previous chat messages
267
+
268
+ Returns:
269
+ AnalysisResult with response or error
270
+ """
271
+ # Build context from history
272
+ history_text = ""
273
+ if history:
274
+ for msg in history[-6:]: # Last 3 exchanges
275
+ if isinstance(msg, dict):
276
+ role = "User" if msg.get("role") == "user" else "Assistant"
277
+ content = msg.get("content", "")
278
+ if content:
279
+ history_text += f"{role}: {content}\n"
280
+
281
+ user_prompt = f"""Code context:
282
+ {code_context}
283
+
284
+ {history_text}
285
+ Current question: {message}"""
286
+
287
+ try:
288
+ content = self._generate_with_llamaindex(CHAT_PROMPT, user_prompt)
289
+ return AnalysisResult(content=content.strip())
290
+ except Exception as e:
291
+ logger.exception("Chat failed")
292
+ return AnalysisResult(content="", success=False, error=str(e))