HimanshuGoyal2004 commited on
Commit
5fcc6d7
·
1 Parent(s): 700f9a1

forgot to add main.py

Browse files
Files changed (1) hide show
  1. main.py +524 -0
main.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simplified Self-Documenting Code Analysis System
3
+
4
+ A streamlined LangGraph workflow with three nodes:
5
+ 1. Research - Understand code and check documentation
6
+ 2. Document - Add simple documentation
7
+ 3. Analyze - Run tests and capture issues/results
8
+ """
9
+
10
+ import os
11
+ import yaml
12
+ import getpass
13
+ import ast
14
+ import datetime
15
+ from typing import TypedDict, List, Dict, Any
16
+ from dotenv import load_dotenv
17
+
18
+ from langgraph.graph import StateGraph, END
19
+ from langgraph.prebuilt import create_react_agent
20
+ from langchain_google_genai import ChatGoogleGenerativeAI
21
+ from langchain_community.tools.tavily_search import TavilySearchResults
22
+ from langchain.tools import tool
23
+ from langchain_core.prompts import ChatPromptTemplate
24
+ from langchain_core.messages import HumanMessage
25
+ from langchain_experimental.tools import PythonREPLTool
26
+
27
+ # Load environment variables
28
+ load_dotenv()
29
+
30
+ # Environment Setup
31
+ def setup_environment():
32
+ """Set up API keys for the agents"""
33
+ if "GOOGLE_API_KEY" not in os.environ:
34
+ os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")
35
+ if "TAVILY_API_KEY" not in os.environ:
36
+ os.environ["TAVILY_API_KEY"] = getpass.getpass("Enter your Tavily API Key: ")
37
+
38
+ # Simplified State Definition
39
+ class CodeState(TypedDict):
40
+ """Simplified state for the workflow"""
41
+ original_code: str
42
+ documented_code: str
43
+ has_documentation: bool
44
+ libraries_used: List[str]
45
+ test_results: List[str]
46
+ issues_found: List[str]
47
+ current_step: str
48
+
49
+ # Tools Definition
50
+ @tool
51
+ def search_library_info(library_name: str) -> str:
52
+ """Search for library documentation and usage examples"""
53
+ search_tool = TavilySearchResults(max_results=2)
54
+ query = f"{library_name} python library documentation examples"
55
+ results = search_tool.invoke(query)
56
+
57
+ formatted_results = []
58
+ for result in results:
59
+ content = result.get('content', 'No content')[:200]
60
+ formatted_results.append(f"Source: {result.get('url', 'N/A')}\nContent: {content}...")
61
+
62
+ return "\n---\n".join(formatted_results)
63
+
64
+ @tool
65
+ def execute_code(code: str) -> str:
66
+ """Execute Python code and return results"""
67
+ python_tool = PythonREPLTool()
68
+ try:
69
+ result = python_tool.invoke(code)
70
+ return f"Execution successful:\n{result}"
71
+ except Exception as e:
72
+ return f"Execution failed:\n{str(e)}"
73
+
74
+ # Prompts
75
+ RESEARCH_PROMPT = """
76
+ You are a Code Research Specialist. Analyze the provided Python code and:
77
+
78
+ 1. Check if the code already has documentation (docstrings, comments)
79
+ 2. Identify all imported libraries and understand their purpose
80
+ 3. Understand what the code does and what kind of tests would be appropriate
81
+ 4. Research any unfamiliar libraries using the search tool
82
+
83
+ Be thorough but concise in your analysis.
84
+ """
85
+
86
+ DOCUMENT_PROMPT = """
87
+ You are a Documentation Generator. Add simple, clear documentation to the code:
88
+
89
+ 1. Add docstrings to functions and classes (keep them concise)
90
+ 2. Add brief comments for complex logic
91
+ 3. Maintain original code functionality
92
+ 4. Use simple, readable formatting
93
+
94
+ Return ONLY the documented code, no explanations.
95
+ """
96
+
97
+ ANALYZE_PROMPT = """
98
+ You are a Code Analyzer and Tester. Your tasks:
99
+
100
+ 1. Execute the code to test its functionality
101
+ 2. Try different test scenarios and inputs
102
+ 3. Identify any issues, errors, or potential problems
103
+ 4. Document the input/output behavior
104
+
105
+ Use the code execution tool to run tests and capture results.
106
+ """
107
+ # Initialize Model
108
+ def create_model():
109
+ """Create the language model"""
110
+ return ChatGoogleGenerativeAI(
111
+ model="gemini-2.5-flash",
112
+ temperature=0.3,
113
+ google_api_key=os.environ["GOOGLE_API_KEY"]
114
+ )
115
+
116
+ # Workflow Nodes
117
+ def research_node(state: CodeState) -> CodeState:
118
+ """
119
+ Research node: Understand code and check documentation
120
+ Uses agent with search tool for library research
121
+ """
122
+ print("RESEARCH: Analyzing code structure and documentation...")
123
+
124
+ model = create_model()
125
+ research_agent = create_react_agent(
126
+ model=model,
127
+ tools=[search_library_info],
128
+ prompt=ChatPromptTemplate.from_messages([
129
+ ("system", RESEARCH_PROMPT),
130
+ ("placeholder", "{messages}")
131
+ ])
132
+ )
133
+
134
+ # Analyze the code
135
+ analysis_input = {
136
+ "messages": [HumanMessage(content=f"Analyze this Python code:\n\n{state['original_code']}")]
137
+ }
138
+
139
+ result = research_agent.invoke(analysis_input)
140
+ response_text = result["messages"][-1].content
141
+
142
+ # Extract libraries using AST
143
+ libraries = []
144
+ try:
145
+ tree = ast.parse(state['original_code'])
146
+ for node in ast.walk(tree):
147
+ if isinstance(node, ast.Import):
148
+ for alias in node.names:
149
+ libraries.append(alias.name)
150
+ elif isinstance(node, ast.ImportFrom):
151
+ module = node.module or ""
152
+ for alias in node.names:
153
+ libraries.append(f"{module}.{alias.name}")
154
+ except:
155
+ pass
156
+
157
+ # Check if code has documentation
158
+ has_docs = ('"""' in state['original_code'] or
159
+ "'''" in state['original_code'] or
160
+ '#' in state['original_code'])
161
+
162
+ print(f" - Libraries found: {libraries}")
163
+ print(f" - Documentation present: {has_docs}")
164
+
165
+ return {
166
+ **state,
167
+ "libraries_used": libraries,
168
+ "has_documentation": has_docs,
169
+ "current_step": "researched"
170
+ }
171
+
172
+ def document_node(state: CodeState) -> CodeState:
173
+ """
174
+ Document node: Add simple documentation to code
175
+ Uses model with documentation prompt (no tools needed)
176
+ """
177
+ print("DOCUMENT: Adding documentation and comments...")
178
+
179
+ model = create_model()
180
+
181
+ # Create documentation prompt
182
+ doc_input = {
183
+ "messages": [HumanMessage(content=f"""
184
+ {DOCUMENT_PROMPT}
185
+
186
+ Code to document:
187
+ {state['original_code']}
188
+
189
+ Libraries used: {', '.join(state['libraries_used'])}
190
+
191
+ Please add comprehensive documentation including:
192
+ - Detailed docstrings for all functions and classes
193
+ - Inline comments explaining complex logic
194
+ - Comments for important variables and calculations
195
+ - Warning comments for potential issues
196
+ """)]
197
+ }
198
+
199
+ result = model.invoke(doc_input["messages"])
200
+ documented_code = result.content
201
+
202
+ # Clean up the response to extract just the code
203
+ if "```python" in documented_code:
204
+ documented_code = documented_code.split("```python")[1].split("```")[0].strip()
205
+ elif "```" in documented_code:
206
+ documented_code = documented_code.split("```")[1].split("```")[0].strip()
207
+
208
+ print(" - Documentation completed")
209
+
210
+ return {
211
+ **state,
212
+ "documented_code": documented_code,
213
+ "current_step": "documented"
214
+ }
215
+
216
+ def analyze_node(state: CodeState) -> CodeState:
217
+ """
218
+ Analyze node: Run tests and capture issues/results
219
+ Uses model with code execution tool
220
+ """
221
+ print("ANALYZE: Testing code and identifying issues...")
222
+
223
+ model = create_model()
224
+
225
+ # Use the code to analyze (documented if available, otherwise original)
226
+ code_to_analyze = state.get('documented_code') or state['original_code']
227
+
228
+ # Create analyzer with code execution tool
229
+ analyzer_agent = create_react_agent(
230
+ model=model,
231
+ tools=[execute_code],
232
+ prompt=ChatPromptTemplate.from_messages([
233
+ ("system", ANALYZE_PROMPT),
234
+ ("placeholder", "{messages}")
235
+ ])
236
+ )
237
+
238
+ # Analyze and test the code
239
+ analysis_input = {
240
+ "messages": [HumanMessage(content=f"""
241
+ Analyze and test this Python code:
242
+
243
+ {code_to_analyze}
244
+
245
+ Execute the code and try different test scenarios. Document any issues and the input/output behavior.
246
+ """)]
247
+ }
248
+
249
+ result = analyzer_agent.invoke(analysis_input)
250
+ response_text = result["messages"][-1].content
251
+
252
+ # Extract test results and issues
253
+ test_results = []
254
+ issues = []
255
+
256
+ # Better parsing of the response
257
+ if isinstance(response_text, str):
258
+ # Use the full response as a single test result for better readability
259
+ test_results.append(response_text)
260
+
261
+ # Extract specific issues from the response
262
+ lines = response_text.split('\n')
263
+ for line in lines:
264
+ line = line.strip()
265
+ if any(keyword in line.lower() for keyword in ['error', 'issue', 'problem', 'fail', 'exception', 'warning']):
266
+ if line and len(line) > 10: # Avoid very short lines
267
+ issues.append(line)
268
+ else:
269
+ test_results.append(str(response_text))
270
+
271
+ # Fallback if no results captured
272
+ if not test_results:
273
+ test_results.append("Analysis completed but no detailed results captured")
274
+
275
+ if not issues:
276
+ issues.append("No critical issues identified during analysis")
277
+
278
+ print(f" - Issues found: {len(issues)}")
279
+ print(f" - Test results captured: {len(test_results)}")
280
+
281
+ return {
282
+ **state,
283
+ "test_results": test_results,
284
+ "issues_found": issues,
285
+ "current_step": "analyzed"
286
+ }
287
+
288
+ # Conditional edge function
289
+ def should_skip_documentation(state: CodeState) -> str:
290
+ """Decide whether to skip documentation based on existing docs"""
291
+ if state["has_documentation"]:
292
+ print(" - Code already documented, proceeding to analysis")
293
+ # Set documented_code to original_code since we're skipping documentation
294
+ state["documented_code"] = state["original_code"]
295
+ return "analyze"
296
+ else:
297
+ print(" - Code requires documentation")
298
+ return "document"
299
+
300
+ # File saving functions
301
+ def save_documented_code(documented_code: str):
302
+ """Save documented code to code.py"""
303
+ try:
304
+ with open("code.py", "w", encoding="utf-8") as f:
305
+ f.write(documented_code)
306
+ print(" - Documented code saved to code.py")
307
+ except Exception as e:
308
+ print(f" - Error saving code.py: {e}")
309
+
310
+ def save_analysis_results(state: CodeState):
311
+ """Save analysis results to analysis.txt"""
312
+ try:
313
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
314
+
315
+ with open("analysis.txt", "w", encoding="utf-8") as f:
316
+ f.write(f"# Code Analysis Results\n")
317
+ f.write(f"Generated on: {timestamp}\n\n")
318
+
319
+ f.write("## Libraries Used\n")
320
+ if state['libraries_used']:
321
+ for lib in state['libraries_used']:
322
+ f.write(f"- {lib}\n")
323
+ else:
324
+ f.write("- No libraries identified\n")
325
+ f.write("\n")
326
+
327
+ f.write("## Issues and Recommendations\n")
328
+ if state['issues_found']:
329
+ for i, issue in enumerate(state['issues_found'], 1):
330
+ f.write(f"{i}. {issue}\n")
331
+ else:
332
+ f.write("- No critical issues identified\n")
333
+ f.write("\n")
334
+
335
+ f.write("## Test Results and I/O Behavior\n")
336
+ if state['test_results']:
337
+ for i, result in enumerate(state['test_results'], 1):
338
+ f.write(f"### Test {i}\n{result}\n\n")
339
+ else:
340
+ f.write("- No test results captured\n")
341
+ f.write("\n")
342
+
343
+ f.write("## Usage Guidelines\n")
344
+ f.write("1. Review the documented code in code.py\n")
345
+ f.write("2. Address any issues or recommendations listed above\n")
346
+ f.write("3. Test the code with various input scenarios\n")
347
+ f.write("4. Validate functionality before production use\n")
348
+
349
+ print(" - Analysis results saved to analysis.txt")
350
+ except Exception as e:
351
+ print(f" - Error saving analysis.txt: {e}")
352
+
353
+ def final_node(state: CodeState) -> CodeState:
354
+ """Final node: Save results to files"""
355
+ print("FINALIZE: Saving results to files...")
356
+
357
+ # Use documented code if available, otherwise original code
358
+ code_to_save = state.get('documented_code') or state['original_code']
359
+
360
+ # Save documented code
361
+ save_documented_code(code_to_save)
362
+
363
+ # Save analysis results
364
+ save_analysis_results(state)
365
+
366
+ print("Workflow completed successfully")
367
+
368
+ return {
369
+ **state,
370
+ "current_step": "completed"
371
+ }
372
+
373
+ # Workflow Creation
374
+ def create_workflow():
375
+ """Create and configure the simplified workflow"""
376
+ workflow = StateGraph(CodeState)
377
+
378
+ # Add nodes
379
+ workflow.add_node("research", research_node)
380
+ workflow.add_node("document", document_node)
381
+ workflow.add_node("analyze", analyze_node)
382
+ workflow.add_node("final", final_node)
383
+
384
+ # Set entry point
385
+ workflow.set_entry_point("research")
386
+
387
+ # Add conditional edge from research
388
+ workflow.add_conditional_edges(
389
+ "research",
390
+ should_skip_documentation,
391
+ {
392
+ "document": "document",
393
+ "analyze": "analyze"
394
+ }
395
+ )
396
+
397
+ # Add regular edges
398
+ workflow.add_edge("document", "analyze")
399
+ workflow.add_edge("analyze", "final")
400
+ workflow.add_edge("final", END)
401
+
402
+ # Compile workflow
403
+ compiled_workflow = workflow.compile()
404
+
405
+ # Generate workflow diagram
406
+ try:
407
+ graph_png = compiled_workflow.get_graph().draw_mermaid_png()
408
+ with open("workflow_diagram.png", "wb") as f:
409
+ f.write(graph_png)
410
+ print("Workflow diagram saved as workflow_diagram.png")
411
+ except Exception as e:
412
+ print(f"Could not save workflow diagram: {e}")
413
+ # Don't fail if diagram generation fails
414
+ pass
415
+
416
+ return compiled_workflow
417
+
418
+ def run_documentation_workflow(code_input: str) -> Dict:
419
+ """
420
+ Run the simplified documentation workflow
421
+
422
+ Args:
423
+ code_input: Python code to analyze and document
424
+
425
+ Returns:
426
+ Dictionary containing all results from the workflow
427
+ """
428
+ # Set up environment
429
+ setup_environment()
430
+
431
+ # Create and run workflow
432
+ app = create_workflow()
433
+
434
+ # Initialize state
435
+ initial_state = {
436
+ "original_code": code_input,
437
+ "documented_code": "",
438
+ "has_documentation": False,
439
+ "libraries_used": [],
440
+ "test_results": [],
441
+ "issues_found": [],
442
+ "current_step": "start"
443
+ }
444
+
445
+ print("Starting Documentation Workflow")
446
+ print("=" * 50)
447
+
448
+ # Stream the workflow execution
449
+ final_result = None
450
+ for step in app.stream(initial_state):
451
+ step_name = list(step.keys())[0]
452
+ step_data = step[step_name]
453
+
454
+ print(f"\nStep: {step_name.upper()}")
455
+ print("-" * 30)
456
+
457
+ final_result = step_data
458
+
459
+ # Display final results
460
+ print("\n" + "=" * 50)
461
+ print("WORKFLOW SUMMARY")
462
+ print("=" * 50)
463
+ print(f"Status: {final_result['current_step']}")
464
+ print(f"Libraries: {len(final_result['libraries_used'])}")
465
+ print(f"Issues: {len(final_result['issues_found'])}")
466
+ print(f"Tests: {len(final_result['test_results'])}")
467
+
468
+ return final_result
469
+
470
+ # Example Usage
471
+ if __name__ == "__main__":
472
+ # Sample code to test the workflow (with potential issues)
473
+ sample_code = """
474
+ import math
475
+ import random
476
+
477
+ def calculate_area(shape, **kwargs):
478
+ if shape == "circle":
479
+ return math.pi * kwargs["radius"] ** 2
480
+ elif shape == "rectangle":
481
+ return kwargs["width"] * kwargs["height"]
482
+ else:
483
+ return 0
484
+
485
+ def divide_numbers(a, b):
486
+ return a / b
487
+
488
+ def process_list(items):
489
+ total = 0
490
+ for i in range(len(items)):
491
+ total += items[i] * 2
492
+ return total
493
+
494
+ class Calculator:
495
+ def __init__(self):
496
+ self.history = []
497
+
498
+ def add(self, a, b):
499
+ result = a + b
500
+ self.history.append(f"{a} + {b} = {result}")
501
+ return result
502
+
503
+ def divide(self, a, b):
504
+ return divide_numbers(a, b)
505
+
506
+ calc = Calculator()
507
+ result = calc.add(5, 3)
508
+ area = calculate_area("circle", radius=5)
509
+ division = calc.divide(10, 2)
510
+ items = [1, 2, 3, 4]
511
+ processed = process_list(items)
512
+ print(f"Results: {result}, {area:.2f}, {division}, {processed}")
513
+ """
514
+
515
+ print("Running Simplified Documentation Workflow")
516
+ print("=" * 50)
517
+
518
+ # Run the workflow
519
+ result = run_documentation_workflow(sample_code)
520
+
521
+ print("\nWorkflow completed successfully!")
522
+ print("Output files:")
523
+ print(" - code.py: Documented code")
524
+ print(" - analysis.txt: Analysis results")