Spaces:
Sleeping
Sleeping
File size: 7,020 Bytes
cef8a01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import logging
import os
import re
from typing import Any, Dict, List
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from .file_scanner import quick_scan_project
class AIReadmeGenerator:
def __init__(self, api_key: str):
self.llm = ChatGroq(
api_key=api_key,
model_name="llama3-70b-8192"
)
def read_file_content(self, file_path: str) -> str:
"""
Read the content of a file, handling different encodings and file sizes.
Args:
file_path (str): Path to the file to read
Returns:
str: File content or error message
"""
try:
# Limit file reading to prevent memory issues
with open(file_path, 'r', encoding='utf-8') as file:
# Read first 10000 characters to prevent large files from overwhelming the system
return file.read(10000)
except Exception as e:
return f"Error reading file {file_path}: {str(e)}"
def analyze_project_structure(self, project_path: str) -> Dict[str, Any]:
"""
Perform a comprehensive analysis of the project.
Args:
project_path (str): Path to the project directory
Returns:
Dict: Detailed project analysis
"""
# Use existing quick_scan_project for initial analysis
project_info = quick_scan_project(project_path)
# Collect file contents
file_contents = {}
for file in project_info.get('files', []):
full_path = os.path.join(project_path, file)
if os.path.isfile(full_path):
file_contents[file] = self.read_file_content(full_path)
# Extract docstrings and comments from main files
code_insights = {}
main_files = ['app.py', 'main.py', 'src/ui.py', 'src/generator.py']
for main_file in main_files:
full_path = os.path.join(project_path, main_file)
if os.path.exists(full_path):
code_insights[main_file] = self.extract_code_insights(full_path)
# Combine all information
comprehensive_analysis = {
"project_structure": project_info,
"file_contents": file_contents,
"code_insights": code_insights,
"requirements": self.read_requirements(project_path)
}
return comprehensive_analysis
def extract_code_insights(self, file_path: str) -> Dict[str, str]:
"""
Extract insights from Python files.
Args:
file_path (str): Path to the Python file
Returns:
Dict: Extracted insights including docstrings, key functions, etc.
"""
insights = {
"module_docstring": "",
"key_functions": [],
"key_classes": []
}
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Extract module-level docstring
module_docstring_match = re.search(r'"""(.*?)"""', content, re.DOTALL)
if module_docstring_match:
insights['module_docstring'] = module_docstring_match.group(1).strip()
# Find key functions
function_matches = re.findall(r'def\s+(\w+)\(.*?\):\s*"""(.*?)"""', content, re.DOTALL)
insights['key_functions'] = [
f"{name}: {desc.strip()}"
for name, desc in function_matches
]
# Find key classes
class_matches = re.findall(r'class\s+(\w+).*?:\s*"""(.*?)"""', content, re.DOTALL)
insights['key_classes'] = [
f"{name}: {desc.strip()}"
for name, desc in class_matches
]
except Exception as e:
insights['error'] = str(e)
return insights
def read_requirements(self, project_path: str) -> List[str]:
"""
Read project requirements file.
Args:
project_path (str): Path to the project directory
Returns:
List[str]: List of requirements
"""
try:
req_path = os.path.join(project_path, 'requirements.txt')
if os.path.exists(req_path):
with open(req_path, 'r') as f:
return [line.strip() for line in f if line.strip() and not line.startswith('#')]
return []
except Exception:
return []
def generate_concise_readme(self, project_path: str) -> str:
"""
Generate a README based on comprehensive project analysis.
Args:
project_path (str): Path to the project directory
Returns:
str: Generated README content
"""
# Analyze the project comprehensively
project_analysis = self.analyze_project_structure(project_path)
# Prepare a detailed prompt for the LLM
template = """
Generate a comprehensive README.md based on the following project analysis:
PROJECT STRUCTURE:
{project_structure}
FILE CONTENTS:
{file_contents}
CODE INSIGHTS:
{code_insights}
REQUIREMENTS:
{requirements}
Based on this information, create a professional, detailed README.md that:
- Explains the project's purpose and functionality
- Describes key features and components
- Provides clear setup and usage instructions
- Highlights technical details
- Includes any relevant dependencies or prerequisites
Ensure the README is informative, well-structured, and tailored to the specific project.
"""
# Prepare context for the LLM
context = {
"project_structure": str(project_analysis.get('project_structure', 'No structure information')),
"file_contents": '\n'.join([f"{k}:\n{v}" for k, v in project_analysis.get('file_contents', {}).items()]),
"code_insights": '\n'.join([f"{k}:\n{str(v)}" for k, v in project_analysis.get('code_insights', {}).items()]),
"requirements": '\n'.join(project_analysis.get('requirements', []))
}
try:
# Generate README using LLM
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | self.llm | StrOutputParser()
readme_content = chain.invoke(context)
return readme_content.strip()
except Exception as e:
logging.error(f"README generation error: {str(e)}")
return f"# Project README\n\nUnable to generate README automatically.\n\nError: {str(e)}" |