The0eau commited on
Commit
43f6bd8
Β·
1 Parent(s): d10890a
Files changed (4) hide show
  1. app.py +293 -112
  2. ask_agent.py +103 -37
  3. doc_generator.py +135 -80
  4. index.md +10 -6
app.py CHANGED
@@ -1,144 +1,325 @@
1
  import gradio as gr
2
  import os
3
  import shutil
 
4
  import tempfile
5
  import zipfile
6
- import subprocess
7
  import uuid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- from ask_agent import ask_agent
10
- from doc_generator import generate_documented_code, generate_requirements_txt
11
- from readme_generator import generate_readme_from_zip
12
 
13
- last_processed_repo_path = ""
14
 
15
- def process_repo(repo_path, zip_output_name="AutoDocs"):
16
  with tempfile.TemporaryDirectory() as temp_output_dir:
17
- # Document .py files
18
- for root, _, files in os.walk(repo_path):
 
 
19
  for file in files:
20
  if file.endswith(".py"):
21
  file_path = os.path.join(root, file)
22
- generate_documented_code(file_path, file_path)
23
-
24
- # requirements.txt
25
- requirements_path = os.path.join(repo_path, "requirements.txt")
26
- generate_requirements_txt(repo_path, requirements_path)
27
-
28
- # Create a temporary .zip for README/index
29
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip:
30
- zip_path = tmp_zip.name
31
- with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
32
- for root, _, files in os.walk(repo_path):
33
- for file in files:
34
- full_path = os.path.join(root, file)
35
- rel_path = os.path.relpath(full_path, repo_path)
36
- zipf.write(full_path, rel_path)
37
-
38
- # README + index.md
39
- readme_path, index_path = generate_readme_from_zip(zip_path, temp_output_dir)
40
-
41
- # Copy the processed repo
42
- for item in os.listdir(repo_path):
43
- s = os.path.join(repo_path, item)
44
- d = os.path.join(temp_output_dir, item)
45
- if os.path.isdir(s):
46
- shutil.copytree(s, d, dirs_exist_ok=True)
47
- else:
48
- shutil.copy2(s, d)
49
-
50
- dest_readme = os.path.join(temp_output_dir, "README.md")
51
- dest_index = os.path.join(temp_output_dir, "index.md")
52
-
53
- if os.path.abspath(readme_path) != os.path.abspath(dest_readme):
54
- shutil.copy2(readme_path, dest_readme)
55
- if os.path.abspath(index_path) != os.path.abspath(dest_index):
56
- shutil.copy2(index_path, dest_index)
57
-
58
- # Output zip file with consistent name
59
- output_zip_path = os.path.join(
60
- tempfile.gettempdir(), f"{zip_output_name}.zip"
61
- )
 
62
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
63
- for root, _, files in os.walk(temp_output_dir):
64
- for file in files:
65
- full_path = os.path.join(root, file)
66
- arcname = os.path.relpath(full_path, temp_output_dir)
67
- zipf.write(full_path, arcname)
68
- global last_processed_repo_path
69
- last_processed_repo_path = output_zip_path
70
  return output_zip_path
71
 
72
- def process_zip_upload(uploaded_zip_file):
 
 
 
73
  zip_path = uploaded_zip_file.name
74
- zip_name = os.path.splitext(os.path.basename(zip_path))[0] # e.g., my_project.zip β†’ my_project
75
 
76
  with tempfile.TemporaryDirectory() as temp_input_dir:
77
- input_zip_path = os.path.join(temp_input_dir, "input_repo.zip")
78
- shutil.copy(zip_path, input_zip_path)
79
- with zipfile.ZipFile(input_zip_path, "r") as zip_ref:
80
- zip_ref.extractall(temp_input_dir)
81
-
82
- extracted_dirs = [d for d in os.listdir(temp_input_dir) if os.path.isdir(os.path.join(temp_input_dir, d))]
83
- repo_root = os.path.join(temp_input_dir, extracted_dirs[0]) if extracted_dirs else temp_input_dir
84
 
85
- return process_repo(repo_root, zip_name)
 
 
 
 
 
 
86
 
87
- def process_github_clone(github_url):
 
 
 
 
 
88
  with tempfile.TemporaryDirectory() as clone_dir:
89
  try:
90
- subprocess.check_call(["git", "clone", github_url, clone_dir])
91
- return process_repo(clone_dir)
 
92
  except subprocess.CalledProcessError:
93
- return "❌ Error cloning the GitHub repository. Please check the URL."
94
-
95
- # Wrapper for process_zip_upload that also returns the path for the state
96
- def process_zip_and_update_state(uploaded_zip_file):
97
- zip_path = process_zip_upload(uploaded_zip_file)
98
- return zip_path, zip_path # (output for gr.File, output for gr.State)
99
-
100
- # Wrapper for process_github_clone as well
101
- def process_git_and_update_state(github_url):
102
- zip_path = process_github_clone(github_url)
103
- return zip_path, zip_path
104
-
105
- # Gradio user interface
106
- with gr.Blocks() as demo:
107
- gr.Markdown("# πŸ€– AutoDocs – Smart Documentation Generator")
108
- last_processed_repo_path_state = gr.State(value="")
109
- with gr.Tab("πŸ“¦ Upload .zip"):
110
- zip_file_input = gr.File(label="Drop your repo .zip file here", file_types=['.zip'])
111
- generate_btn_zip = gr.Button("πŸ“„ Generate from ZIP")
112
- output_zip_zip = gr.File(label="⬇️ Download your documented repo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- with gr.Tab("🌐 GitHub URL"):
115
- github_url_input = gr.Text(label="Link to GitHub repository", placeholder="https://github.com/user/repo.git")
116
- generate_btn_git = gr.Button("πŸ“„ Generate from GitHub")
117
- output_zip_git = gr.File(label="⬇️ Download your documented repo")
118
-
119
- with gr.Tab("🧠 Ask the agent about the repo"):
120
- chatbot = gr.Chatbot()
121
- user_input = gr.Textbox(placeholder="Ask your question here...")
122
- send_btn = gr.Button("Send")
123
-
124
- send_btn.click(
125
- fn=ask_agent,
126
- inputs=[chatbot, user_input, last_processed_repo_path_state],
127
- outputs=[chatbot, user_input]
 
 
 
 
 
 
 
 
 
128
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- generate_btn_zip.click(
131
- fn=process_zip_and_update_state,
132
- inputs=[zip_file_input],
133
- outputs=[output_zip_zip, last_processed_repo_path_state]
134
- )
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  generate_btn_git.click(
137
- fn=process_git_and_update_state,
138
- inputs=[github_url_input],
139
- outputs=[output_zip_git, last_processed_repo_path_state]
 
 
 
 
 
 
140
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  if __name__ == "__main__":
143
- demo.queue()
144
- demo.launch()
 
 
1
  import gradio as gr
2
  import os
3
  import shutil
4
+ import subprocess
5
  import tempfile
6
  import zipfile
7
+
8
  import uuid
9
+ from functools import partial
10
+
11
+ # Import get_llm, but other modules will call it with current provider state
12
+ from llm_interface import get_llm
13
+ from ask_agent import ask_agent # ask_agent will call get_llm()
14
+ from doc_generator import generate_documented_code, generate_requirements_txt # these too
15
+ from readme_generator import generate_readme_from_zip # and this
16
+
17
+ # Helper to get current LLM based on UI state
18
+ # This is NOT how get_llm should be used directly by the modules.
19
+ # Instead, the modules call get_llm() which now can take UI selected provider.
20
+ # The `current_llm_provider_state` and `hf_endpoint_state` will be passed to `get_llm()`
21
+ # from the functions that are directly invoked by Gradio events.
22
+
23
+ def process_repo(repo_path, zip_output_name="AutoDocs",
24
+ llm_provider_ui: str = None, hf_endpoint_ui: str = None,
25
+ google_api_key_ui: str = None, hf_api_key_ui: str = None): # Pass UI choices
26
+ """
27
+ Processes a repository. Now calls get_llm with UI selected provider.
28
+ """
29
+ # Note: generate_documented_code, etc., will call get_llm() internally.
30
+ # We need to ensure get_llm() can pick up these UI-set values.
31
+ # This requires a change in how get_llm() is called or how state is managed globally.
32
+ # For simplicity here, we're assuming the modules (doc_generator, etc.) will call
33
+ # get_llm() and it will use the latest state (which is tricky with just env vars).
34
+ # A better approach: pass the llm_instance to these functions.
35
+ # OR: Make get_llm() aware of Gradio state (not ideal).
36
+
37
+ # Let's make the processing functions accept the llm_provider and hf_endpoint
38
+ # and they will pass it to get_llm when they need an LLM instance.
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
 
 
 
 
47
 
 
48
 
 
49
  with tempfile.TemporaryDirectory() as temp_output_dir:
50
+ processed_repo_path = os.path.join(temp_output_dir, "processed_repo")
51
+ shutil.copytree(repo_path, processed_repo_path)
52
+
53
+ for root, _, files in os.walk(processed_repo_path):
54
  for file in files:
55
  if file.endswith(".py"):
56
  file_path = os.path.join(root, file)
57
+ # Pass provider info to functions that use LLM
58
+ generate_documented_code(file_path, file_path,
59
+ llm_provider=llm_provider_ui,
60
+ hf_endpoint=hf_endpoint_ui,
61
+ hf_api_key=hf_api_key_ui,
62
+ google_api_key=google_api_key_ui)
63
+
64
+
65
+ requirements_path = os.path.join(processed_repo_path, "requirements.txt")
66
+ generate_requirements_txt(processed_repo_path, requirements_path) # Does not use LLM
67
+
68
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip_for_readme:
69
+ zip_path_for_readme = tmp_zip_for_readme.name
70
+ with zipfile.ZipFile(zip_path_for_readme, "w", zipfile.ZIP_DEFLATED) as zipf:
71
+ for r, _, fs in os.walk(processed_repo_path):
72
+ for f_item in fs:
73
+ full_p = os.path.join(r, f_item)
74
+ arcname = os.path.relpath(full_p, processed_repo_path)
75
+ zipf.write(full_p, arcname)
76
+
77
+ with tempfile.TemporaryDirectory() as readme_out_dir:
78
+ # Pass provider info
79
+ readme_path, index_path = generate_readme_from_zip(
80
+ zip_path_for_readme, readme_out_dir,
81
+ llm_provider=llm_provider_ui,
82
+ hf_endpoint=hf_endpoint_ui,
83
+ hf_api_key=hf_api_key_ui,
84
+ google_api_key=google_api_key_ui
85
+ )
86
+ shutil.copy2(readme_path, os.path.join(processed_repo_path, "README.md"))
87
+ shutil.copy2(index_path, os.path.join(processed_repo_path, "index.md"))
88
+
89
+ os.remove(zip_path_for_readme)
90
+
91
+ output_zip_filename = f"{zip_output_name}_{uuid.uuid4().hex[:8]}.zip"
92
+ output_zip_path = os.path.join(tempfile.gettempdir(), output_zip_filename)
93
+
94
+
95
+
96
+
97
+
98
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
99
+ for r, _, fs in os.walk(processed_repo_path):
100
+ for f_item in fs:
101
+ full_p = os.path.join(r, f_item)
102
+ arcname = os.path.relpath(full_p, processed_repo_path)
103
+ zipf.write(full_p, arcname)
104
+
105
+
106
  return output_zip_path
107
 
108
+ # --- Core processing functions now accept LLM provider details ---
109
+ def actual_process_zip_upload(uploaded_zip_file, progress_tracker,
110
+ llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
111
+ progress_tracker(0, desc="Starting upload processing...")
112
  zip_path = uploaded_zip_file.name
113
+ zip_name = os.path.splitext(os.path.basename(zip_path))[0]
114
 
115
  with tempfile.TemporaryDirectory() as temp_input_dir:
116
+ progress_tracker(0.1, desc="Unzipping repository...")
117
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
 
 
 
 
 
118
 
119
+ zip_ref.extractall(temp_input_dir)
120
+ extracted_items = os.listdir(temp_input_dir)
121
+ repo_root = temp_input_dir
122
+ if len(extracted_items) == 1 and os.path.isdir(os.path.join(temp_input_dir, extracted_items[0])):
123
+ repo_root = os.path.join(temp_input_dir, extracted_items[0])
124
+ progress_tracker(0.3, desc="Generating documentation...")
125
+ return process_repo(repo_root, zip_name, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
126
 
127
+ def actual_process_github_clone(github_url, progress_tracker,
128
+ llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
129
+ if not github_url or not (github_url.startswith("https://") or github_url.startswith("http://")):
130
+ return "❌ Please provide a valid GitHub URL.", ""
131
+ progress_tracker(0, desc="Cloning GitHub repository...")
132
+ repo_name_from_url = github_url.split("/")[-1].replace(".git", "")
133
  with tempfile.TemporaryDirectory() as clone_dir:
134
  try:
135
+ subprocess.check_call(["git", "clone", "--depth", "1", github_url, clone_dir])
136
+ progress_tracker(0.3, desc="Generating documentation...")
137
+ return process_repo(clone_dir, repo_name_from_url, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
138
  except subprocess.CalledProcessError:
139
+ return "❌ Error cloning the GitHub repository. Please check the URL.", ""
140
+ except Exception as e:
141
+ return f"❌ An unexpected error occurred: {e}", ""
142
+
143
+ # --- Gradio UI event handler wrapper ---
144
+ def process_and_update_state_handler(
145
+ specific_processing_function, # e.g. actual_process_github_clone
146
+ data_input, # e.g. github_url
147
+ # LLM config from UI state:
148
+ llm_provider_state: str,
149
+ hf_endpoint_state: str,
150
+ google_api_key_state: str,
151
+ hf_api_key_state: str, # Added this
152
+ progress=gr.Progress(track_tqdm=True)
153
+ ):
154
+ # Validate required fields based on provider
155
+ if llm_provider_state == "GEMINI" and not google_api_key_state:
156
+ return "❌ Google API Key is required for Gemini. Please set it in the LLM Configuration tab or .env file.", None
157
+ if llm_provider_state == "HUGGINGFACE" and not hf_endpoint_state:
158
+ return "❌ Hugging Face Model Endpoint is required. Please set it in the LLM Configuration tab or .env file.", None
159
+
160
+ # Call the actual processing function with all necessary args
161
+ result = specific_processing_function(
162
+ data_input, progress,
163
+ llm_provider_state, hf_endpoint_state,
164
+ google_api_key_state, hf_api_key_state
165
+ )
166
+
167
+ if isinstance(result, tuple) and "❌" in result[0]: return result[0], None
168
+ elif isinstance(result, str) and "❌" in result: return result, None
169
+ elif isinstance(result, str) and os.path.exists(result): return result, result
170
+ else: return f"❌ Unexpected result from processing: {result}", None
171
+
172
+ # --- Gradio UI ---
173
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
174
+ gr.Markdown("# πŸ€– AutoDocs – Intelligent Documentation Generator")
175
+
176
+
177
+
178
+
179
+
180
 
181
+ # --- State objects for LLM config ---
182
+ # Initialize from .env or defaults
183
+ default_provider = os.getenv("LLM_PROVIDER", "GEMINI").upper()
184
+ default_hf_endpoint = os.getenv("HF_MODEL_ENDPOINT", "")
185
+ default_google_api_key = os.getenv("GOOGLE_API_KEY", "")
186
+ default_hf_api_key = os.getenv("HF_API_KEY", "")
187
+
188
+
189
+ current_llm_provider_state = gr.State(default_provider)
190
+ current_hf_endpoint_state = gr.State(default_hf_endpoint)
191
+ current_google_api_key_state = gr.State(default_google_api_key)
192
+ current_hf_api_key_state = gr.State(default_hf_api_key) # For user to input if needed for HF
193
+
194
+ last_processed_repo_path_state = gr.State(None)
195
+
196
+ # --- LLM Configuration Tab ---
197
+ with gr.Tab("βš™οΈ LLM Configuration"):
198
+ gr.Markdown("Configure your preferred Language Model provider. Settings here override `.env` file values for the current session.")
199
+
200
+ selected_provider_radio = gr.Radio(
201
+ ["GEMINI", "HUGGINGFACE"],
202
+ label="Select LLM Provider",
203
+ value=default_provider
204
  )
205
+
206
+ # Gemini specific inputs
207
+ with gr.Group(visible=(default_provider == "GEMINI")) as gemini_config_group:
208
+ gr.Markdown("### Gemini Configuration")
209
+ google_api_key_input = gr.Textbox(
210
+ label="Google API Key",
211
+ placeholder="Enter your Google API Key (starts with 'AIzaSy...')",
212
+ value=default_google_api_key,
213
+ type="password"
214
+ )
215
+
216
+ # Hugging Face specific inputs
217
+ with gr.Group(visible=(default_provider == "HUGGINGFACE")) as hf_config_group:
218
+ gr.Markdown("### Hugging Face Configuration")
219
+ hf_endpoint_input = gr.Textbox(
220
+ label="Hugging Face Model Endpoint URL",
221
+ placeholder="e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
222
+ value=default_hf_endpoint
223
+ )
224
+ hf_api_key_input = gr.Textbox( # Added HF API Key input
225
+ label="Hugging Face API Key (Optional)",
226
+ placeholder="Enter your Hugging Face API Key (starts with 'hf_') if needed",
227
+ value=default_hf_api_key,
228
+ type="password"
229
+ )
230
+
231
+ # Update visibility of config groups based on radio selection
232
+ def toggle_config_visibility(provider_choice):
233
+ is_gemini = provider_choice == "GEMINI"
234
+ is_hf = provider_choice == "HUGGINGFACE"
235
+ return {
236
+ gemini_config_group: gr.update(visible=is_gemini),
237
+ hf_config_group: gr.update(visible=is_hf),
238
+ # Update state variables
239
+ current_llm_provider_state: provider_choice
240
+ }
241
+
242
+ selected_provider_radio.change(
243
+ fn=toggle_config_visibility,
244
+ inputs=[selected_provider_radio],
245
+ outputs=[gemini_config_group, hf_config_group, current_llm_provider_state]
246
+ )
247
+
248
+ # Update state when text inputs change
249
+ hf_endpoint_input.change(lambda x: x, inputs=[hf_endpoint_input], outputs=[current_hf_endpoint_state])
250
+ google_api_key_input.change(lambda x: x, inputs=[google_api_key_input], outputs=[current_google_api_key_state])
251
+ hf_api_key_input.change(lambda x: x, inputs=[hf_api_key_input], outputs=[current_hf_api_key_state])
252
 
 
 
 
 
 
253
 
254
+ # --- Processing Tabs (GitHub, ZIP) ---
255
+ with gr.Tab("🌐 Process from GitHub URL"):
256
+ github_url_input = gr.Text(label="GitHub Repository URL", placeholder="https://github.com/gradio-app/gradio")
257
+ generate_btn_git = gr.Button("πŸ“„ Generate from GitHub", variant="primary")
258
+ output_zip_git = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
259
+
260
+ with gr.Tab("πŸ“¦ Process from .zip upload"):
261
+ zip_file_input = gr.File(label="Upload a .zip file of your repository", file_types=['.zip'])
262
+ generate_btn_zip = gr.Button("πŸ“„ Generate from ZIP", variant="primary")
263
+ output_zip_zip = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
264
+
265
+ # --- Chat Tab ---
266
+ with gr.Tab("🧠 Ask a Question about the Repo"):
267
+ with gr.Column():
268
+ gr.Markdown("Once you've processed a repository, you can ask questions about its content here. Uses the LLM configured in 'LLM Configuration' tab.")
269
+ chatbot = gr.Chatbot(label="Agent Chat", height=500)
270
+ user_input_tb = gr.Textbox(placeholder="e.g., 'What does the main function in app.py do?'", show_label=False, container=False)
271
+ send_btn = gr.Button("βœ‰οΈ Send")
272
+
273
+ # --- Click Handlers ---
274
+ # Now pass all relevant state variables to the handler
275
  generate_btn_git.click(
276
+ fn=partial(process_and_update_state_handler, actual_process_github_clone),
277
+ inputs=[
278
+ github_url_input,
279
+ current_llm_provider_state,
280
+ current_hf_endpoint_state,
281
+ current_google_api_key_state,
282
+ current_hf_api_key_state
283
+ ],
284
+ outputs=[output_zip_git, last_processed_repo_path_state],
285
  )
286
+
287
+ generate_btn_zip.click(
288
+ fn=partial(process_and_update_state_handler, actual_process_zip_upload),
289
+ inputs=[
290
+ zip_file_input,
291
+ current_llm_provider_state,
292
+ current_hf_endpoint_state,
293
+ current_google_api_key_state,
294
+ current_hf_api_key_state
295
+ ],
296
+ outputs=[output_zip_zip, last_processed_repo_path_state],
297
+ )
298
+
299
+ def handle_chat_submit(history, message, repo_path_state,
300
+ provider_state, hf_endpoint_s, google_api_key_s, hf_api_key_s): # Add provider states
301
+ if not message.strip(): return history, message
302
+
303
+ # Pass provider info to ask_agent
304
+ updated_history, _ = ask_agent(
305
+ history, message, repo_path_state,
306
+ llm_provider=provider_state,
307
+ hf_endpoint=hf_endpoint_s,
308
+ google_api_key=google_api_key_s,
309
+ hf_api_key=hf_api_key_s
310
+ )
311
+ return updated_history, ""
312
+
313
+ # Gather all necessary states for the chat handler
314
+ chat_inputs = [
315
+ chatbot, user_input_tb, last_processed_repo_path_state,
316
+ current_llm_provider_state, current_hf_endpoint_state,
317
+ current_google_api_key_state, current_hf_api_key_state
318
+ ]
319
+ user_input_tb.submit(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
320
+ send_btn.click(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
321
 
322
  if __name__ == "__main__":
323
+ from dotenv import load_dotenv
324
+ load_dotenv() # Load .env for defaults, UI can override
325
+ demo.queue().launch() # Removed share=True for local testing
ask_agent.py CHANGED
@@ -1,62 +1,128 @@
1
  import os
2
  import tempfile
3
  import zipfile
4
- import google.generativeai as genai
5
- from dotenv import load_dotenv
6
- load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- API_KEY = os.getenv("GOOGLE_API_KEY")
9
- genai.configure(api_key=API_KEY)
10
- model = genai.GenerativeModel("models/gemini-2.0-flash")
11
- chat_session = model.start_chat(history=[])
12
 
13
- def ask_agent(history, message, last_processed_repo_path):
14
 
15
  if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
16
- return history, "πŸ“‚ No repository has been processed yet. Please generate documentation first."
 
 
 
 
 
17
 
 
 
18
  with tempfile.TemporaryDirectory() as tmpdir:
19
- with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
20
- zip_ref.extractall(tmpdir)
 
 
 
 
 
 
 
 
21
 
22
- # Extensions for docs and code to consider
23
  extensions_docs = [".md", ".txt"]
24
- extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php"]
25
 
26
  all_files = []
27
- for root, _, files in os.walk(tmpdir):
 
 
 
 
 
28
  for file in files:
29
- ext = os.path.splitext(file)[1].lower()
30
- if ext in extensions_docs or ext in extensions_code:
31
  all_files.append(os.path.join(root, file))
32
 
33
  if not all_files:
34
- return history, "πŸ“„ No documentation or code files found in the generated zip."
 
 
 
 
 
 
35
 
36
- # Read and concatenate content
37
- docs_and_code_content = ""
38
  for file_path in all_files:
 
 
 
39
  try:
40
- with open(file_path, "r", encoding="utf-8") as f:
41
- file_content = f.read()
42
- rel_path = os.path.relpath(file_path, tmpdir)
43
- docs_and_code_content += f"\n\n===== File: {rel_path} =====\n\n"
44
- docs_and_code_content += file_content
 
45
  except Exception as e:
46
- docs_and_code_content += f"\n\n===== Error reading file {file_path}: {str(e)} =====\n\n"
 
 
 
 
 
 
47
 
48
- prompt = (
49
- f"Here is the content of the project (documentation and code):\n\n{docs_and_code_content}\n\n"
50
- f"Question: {message}\n\nPlease respond clearly and precisely."
51
- )
52
 
53
- try:
54
- response = chat_session.send_message(prompt)
55
- answer = response.text
56
- except Exception as e:
57
- answer = f"❌ Error when calling Gemini: {str(e)}"
 
 
 
 
 
 
 
 
 
 
58
 
59
- history = history or []
60
- history.append((message, answer))
61
 
62
- return history, ""
 
1
  import os
2
  import tempfile
3
  import zipfile
4
+ from typing import List, Tuple # Retain for Gradio history if not switching to 'messages' type
5
+
6
+ from llm_interface import get_llm # Make sure this is imported
7
+
8
+ def ask_agent(gradio_history: List[Tuple[str, str]],
9
+ message: str,
10
+ last_processed_repo_path: str,
11
+ llm_provider: str = None,
12
+ hf_endpoint: str = None,
13
+ hf_api_key: str = None,
14
+ google_api_key: str = None):
15
+ """
16
+ Handles a user's question about a processed repository using a conversational LLM.
17
+
18
+ Args:
19
+ gradio_history: The chat history from the Gradio chatbot component.
20
+ message: The new message from the user.
21
+ last_processed_repo_path: Path to the zip file of the last processed repo.
22
+ llm_provider: The LLM provider chosen in the UI.
23
+ hf_endpoint: The Hugging Face endpoint URL, if chosen.
24
+ hf_api_key: The Hugging Face API key, if provided.
25
+ google_api_key: The Google API key, if chosen.
26
+
27
+ Returns:
28
+ A tuple containing the updated Gradio history and an empty string for the textbox.
29
+ """
30
+ # Get LLM instance with current provider settings from UI/env
31
+ llm = get_llm(provider=llm_provider,
32
+ hf_endpoint=hf_endpoint,
33
+ hf_api_key=hf_api_key,
34
+ google_api_key=google_api_key)
35
+
36
+ if not message or not message.strip():
37
+ gradio_history.append((message, "Please enter a question."))
38
+ return gradio_history, ""
39
 
 
 
 
 
40
 
 
41
 
42
  if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
43
+ gradio_history.append((message, "πŸ“‚ No repository has been processed yet. Please generate documentation for a repository first on the other tabs."))
44
+ return gradio_history, ""
45
+
46
+ if not zipfile.is_zipfile(last_processed_repo_path):
47
+ gradio_history.append((message, f"❌ The stored path '{last_processed_repo_path}' is not a valid .zip file. Please re-process a repository."))
48
+ return gradio_history, ""
49
 
50
+
51
+ docs_and_code_content = ""
52
  with tempfile.TemporaryDirectory() as tmpdir:
53
+ try:
54
+ with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
55
+ zip_ref.extractall(tmpdir)
56
+ except zipfile.BadZipFile:
57
+ gradio_history.append((message, "❌ The processed repository file seems corrupted. Please re-process a repository."))
58
+ return gradio_history, ""
59
+ except Exception as e:
60
+ gradio_history.append((message, f"❌ Error extracting the repository: {e}. Please re-process."))
61
+ return gradio_history, ""
62
+
63
 
 
64
  extensions_docs = [".md", ".txt"]
65
+ extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php", ".yml", ".yaml", ".json", ".xml", ".html", ".css", ".sh"]
66
 
67
  all_files = []
68
+ extracted_items = os.listdir(tmpdir)
69
+ repo_scan_root = tmpdir
70
+ if len(extracted_items) == 1 and os.path.isdir(os.path.join(tmpdir, extracted_items[0])):
71
+ repo_scan_root = os.path.join(tmpdir, extracted_items[0])
72
+
73
+ for root, _, files in os.walk(repo_scan_root):
74
  for file in files:
75
+ _, ext = os.path.splitext(file)
76
+ if ext.lower() in extensions_docs or ext.lower() in extensions_code:
77
  all_files.append(os.path.join(root, file))
78
 
79
  if not all_files:
80
+ gradio_history.append((message, "πŸ“„ No relevant documentation or code files were found in the processed repository. It might be empty or contain unsupported file types."))
81
+ return gradio_history, ""
82
+
83
+ MAX_CONTENT_CHARS = 30000
84
+ current_chars = 0
85
+
86
+
87
 
 
 
88
  for file_path in all_files:
89
+ if current_chars >= MAX_CONTENT_CHARS:
90
+ docs_and_code_content += "\n\n===== [Content Truncated due to size limit] ====="
91
+ break
92
  try:
93
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
94
+ file_content = f.read(MAX_CONTENT_CHARS - current_chars)
95
+ rel_path = os.path.relpath(file_path, repo_scan_root)
96
+ content_to_add = f"\n\n===== File: {rel_path} =====\n\n{file_content}"
97
+ docs_and_code_content += content_to_add
98
+ current_chars += len(content_to_add)
99
  except Exception as e:
100
+ error_msg = f"\n\n===== Error reading file {os.path.relpath(file_path, repo_scan_root)}: {e} =====\n\n"
101
+ docs_and_code_content += error_msg
102
+ current_chars += len(error_msg)
103
+
104
+ if not docs_and_code_content.strip():
105
+ gradio_history.append((message, "πŸ“„ Could not read content from any relevant files in the repository."))
106
+ return gradio_history, ""
107
 
 
 
 
 
108
 
109
+ current_turn_prompt = (
110
+ f"You are a helpful AI assistant, an expert in understanding code and project structures. "
111
+ f"Based ONLY on the following project content, answer the user's question. "
112
+ f"If the answer cannot be found in the provided content, say so. Do not invent information.\n\n"
113
+ f"--- Project Content ---\n{docs_and_code_content}\n--- End Project Content ---\n\n"
114
+ f"User Question: {message}\n\n"
115
+ f"Your Answer (be clear, concise, and stay strictly within the provided content):"
116
+ )
117
+
118
+ chat_session_obj = llm.start_chat_session(history=gradio_history)
119
+ # Check if starting the session itself failed (e.g., due to API key issues reported by get_llm/LLMInterface stubs)
120
+ if isinstance(chat_session_obj, str) and chat_session_obj.startswith("❌"):
121
+ # The error message from start_chat_session (or the stub) is the response
122
+ gradio_history.append((message, chat_session_obj))
123
+ return gradio_history, ""
124
 
125
+ answer = llm.send_chat_message(session=chat_session_obj, message=current_turn_prompt)
126
+ gradio_history.append((message, answer))
127
 
128
+ return gradio_history, ""
doc_generator.py CHANGED
@@ -1,141 +1,196 @@
1
- import google.generativeai as genai
2
- import re
3
- import os
4
  import ast
5
- from dotenv import load_dotenv
6
- import sys
7
  import importlib.util
 
 
 
 
 
 
 
 
8
 
9
- load_dotenv()
10
 
11
- API_KEY = os.getenv("GOOGLE_API_KEY")
12
- if API_KEY is None:
13
- raise ValueError("⚠️ The API key MY_API_KEY is missing! Check the Secrets in Hugging Face.")
14
- genai.configure(api_key=API_KEY)
15
- model = genai.GenerativeModel("models/gemini-2.0-flash")
16
 
17
  PROMPT = """You are an expert programming assistant.
18
- For the following code, perform the following actions:
19
- - The code must remain exactly the same
20
- - Add clear comments for each important step.
21
- - Rename variables if it makes the code easier to understand.
22
- - Add type annotations if the language supports it.
23
- - For each function, add a Google-style docstring (or equivalent format depending on the language).
24
-
25
- Respond only with the updated code, no explanation.
26
- Here is the code:
27
 
 
 
 
28
  {code}
29
  """
30
-
31
- def generate_documented_code(input_path: str, output_path: str) -> str:
 
32
  """
33
- Generate a documented version of the code from the given input file and save it to the output file.
34
-
35
  Args:
36
- input_path (str): Path to the original code file.
37
- output_path (str): Path where the documented code will be saved.
38
-
 
 
 
 
 
39
  Returns:
40
- str: The updated and documented code.
41
  """
42
- with open(input_path, "r", encoding="utf-8") as f:
43
- original_code = f.read()
 
 
 
44
 
45
- prompt = PROMPT.format(code=original_code)
46
- response = model.generate_content(prompt)
47
- updated_code = response.text.strip()
48
-
49
- # Clean up Markdown blocks if present
50
- lines = updated_code.splitlines()
51
- if len(lines) > 2:
52
- lines = lines[1:-1] # remove the first and last lines
53
- updated_code = "\n".join(lines)
54
- else:
55
- # if less than 3 lines, clear everything or keep as is depending on needs
56
- updated_code = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  with open(output_path, "w", encoding="utf-8") as output_file:
59
  output_file.write(updated_code)
60
 
61
  return updated_code
62
 
 
63
 
64
- def extract_imports_from_file(file_path):
65
- """
66
- Extract imported modules from a Python file to generate requirements.txt.
67
 
 
 
68
  Args:
69
- file_path (str): Path to the Python file.
70
 
71
  Returns:
72
- set: A set of imported module names.
73
  """
 
74
  try:
75
- with open(file_path, "r", encoding="utf-8") as f:
76
- tree = ast.parse(f.read())
77
- except SyntaxError:
78
- return set()
 
 
 
 
 
 
 
 
79
 
80
- imports = set()
81
  for node in ast.walk(tree):
82
  if isinstance(node, ast.Import):
83
  for alias in node.names:
84
- imports.add(alias.name.split('.')[0])
85
- elif isinstance(node, ast.ImportFrom):
86
- if node.module and not node.module.startswith("."):
87
  imports.add(node.module.split('.')[0])
88
  return imports
89
 
90
-
91
- def is_std_lib(module_name):
92
  """
93
- Check if a module is part of the Python standard library.
94
-
95
  Args:
96
- module_name (str): The name of the module.
97
 
98
  Returns:
99
- bool: True if the module is part of the standard library, False otherwise.
100
  """
 
 
101
  if module_name in sys.builtin_module_names:
102
  return True
103
- spec = importlib.util.find_spec(module_name)
104
- return spec is not None and "site-packages" not in (spec.origin or "")
105
-
106
-
107
- def generate_requirements_txt(base_path, output_path):
 
 
 
 
 
 
 
108
  """
109
- Generate a requirements.txt file based on external imports found in Python files.
 
 
110
 
111
  Args:
112
- base_path (str): Root directory of the codebase.
113
- output_path (str): Path to save the generated requirements.txt file.
114
  """
115
  all_imports = set()
116
  local_modules = set()
117
 
118
- # Get names of internal modules (i.e., .py files in the repo)
119
- for root, _, files in os.walk(base_path):
 
 
 
 
 
 
 
 
120
  for file in files:
121
  if file.endswith(".py"):
122
  module_name = os.path.splitext(file)[0]
123
  local_modules.add(module_name)
 
 
 
 
124
 
125
- # Extract all imports used in the project
126
- for root, _, files in os.walk(base_path):
127
  for file in files:
128
  if file.endswith(".py"):
129
  file_path = os.path.join(root, file)
130
  all_imports.update(extract_imports_from_file(file_path))
131
 
132
- # Remove internal modules and standard library modules
133
- external_imports = sorted([
134
  imp for imp in all_imports
135
- if imp not in local_modules and not is_std_lib(imp)
136
- ])
 
137
 
138
- # Write the requirements.txt file
139
  with open(output_path, "w", encoding="utf-8") as f:
140
- for package in external_imports:
141
- f.write(f"{package}\n")
 
 
 
 
 
 
 
1
  import ast
2
+
3
+
4
  import importlib.util
5
+ import os
6
+ import sys
7
+
8
+ from llm_interface import get_llm # Make sure this is imported
9
+
10
+
11
+
12
+
13
 
 
14
 
 
 
 
 
 
15
 
16
  PROMPT = """You are an expert programming assistant.
17
+ For the following Python code, perform the following actions:
18
+ - The code structure (classes, functions, arguments, order) must remain exactly the same.
19
+ - Add clear comments for each important step or complex logic.
20
+ - Add Google-style docstrings for all classes and functions. Docstrings should explain the purpose, arguments (with types if possible from context), and what the function/method returns (if anything, with type).
21
+ - Add type annotations for function arguments and return types. If a type is complex or unknown, use `typing.Any` or a descriptive placeholder if appropriate.
22
+
23
+ Respond ONLY with the updated Python code. Do not add any explanatory text before or after the code block.
24
+ Ensure the output is a single, valid Python code block.
 
25
 
26
+
27
+ Here is the code:
28
+ ```python
29
  {code}
30
  """
31
+ def generate_documented_code(input_path: str, output_path: str,
32
+ llm_provider: str = None, hf_endpoint: str = None,
33
+ hf_api_key: str = None, google_api_key: str = None) -> str:
34
  """
35
+ Generates documented code for a given file using the configured LLM.
 
36
  Args:
37
+ input_path: Path to the Python file to document.
38
+ output_path: Path where the documented Python file will be saved.
39
+ llm_provider: The LLM provider chosen in the UI (e.g., "GEMINI", "HUGGINGFACE").
40
+ hf_endpoint: The Hugging Face endpoint URL, if chosen.
41
+ hf_api_key: The Hugging Face API key, if provided.
42
+ google_api_key: The Google API key, if chosen.
43
+
44
+
45
  Returns:
46
+ The content of the documented code, or the original code/error message on failure.
47
  """
48
+ # Get LLM instance with current provider settings from UI/env
49
+ llm = get_llm(provider=llm_provider,
50
+ hf_endpoint=hf_endpoint,
51
+ hf_api_key=hf_api_key,
52
+ google_api_key=google_api_key)
53
 
54
+ try:
55
+ with open(input_path, "r", encoding="utf-8") as f:
56
+ original_code = f.read()
57
+ except Exception as e:
58
+ print(f"Error reading input file {input_path}: {e}")
59
+ error_content = f"# Error reading input file: {e}\n"
60
+ # Still write to output_path so the file exists for zipping, even if it's an error message
61
+ with open(output_path, "w", encoding="utf-8") as output_file:
62
+ output_file.write(error_content)
63
+ return error_content
64
+
65
+
66
+ if not original_code.strip():
67
+ with open(output_path, "w", encoding="utf-8") as output_file:
68
+ output_file.write("") # Write empty if original is empty
69
+ return ""
70
+
71
+ formatted_prompt = PROMPT.format(code=original_code)
72
+ updated_code = llm.generate_content(formatted_prompt) # Use the llm instance
73
+
74
+ # Check if LLM returned an error message or empty content
75
+ # The llm.generate_content itself should return "❌ ..." on failure
76
+ if updated_code.startswith("❌") or not updated_code.strip():
77
+ print(f"LLM failed to generate documented code for {input_path}. Using original code. LLM Output: {updated_code}")
78
+ # Fallback: write original code to output path if LLM fails significantly
79
+ with open(output_path, "w", encoding="utf-8") as output_file:
80
+ output_file.write(original_code)
81
+ # Return original code so the rest of the process can continue with undoc'd code
82
+ return original_code
83
 
84
  with open(output_path, "w", encoding="utf-8") as output_file:
85
  output_file.write(updated_code)
86
 
87
  return updated_code
88
 
89
+ def extract_imports_from_file(file_path: str) -> set:
90
 
 
 
 
91
 
92
+ """
93
+ Extracts imported module names from a Python file.
94
  Args:
95
+ file_path: The path to the Python file.
96
 
97
  Returns:
98
+ A set of top-level imported module names.
99
  """
100
+ imports = set()
101
  try:
102
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
103
+ source_code = f.read()
104
+ if not source_code.strip():
105
+ return imports
106
+ try:
107
+ tree = ast.parse(source_code)
108
+ except SyntaxError:
109
+ return imports
110
+ except OSError:
111
+ return imports
112
+
113
+
114
 
 
115
  for node in ast.walk(tree):
116
  if isinstance(node, ast.Import):
117
  for alias in node.names:
 
 
 
118
  imports.add(node.module.split('.')[0])
119
  return imports
120
 
121
+ def is_std_lib(module_name: str) -> bool:
 
122
  """
123
+ Checks if a module name is part of the Python standard library.
 
124
  Args:
125
+ module_name: The name of the module.
126
 
127
  Returns:
128
+ True if the module is standard library, False otherwise.
129
  """
130
+ if not module_name:
131
+ return False
132
  if module_name in sys.builtin_module_names:
133
  return True
134
+ try:
135
+ spec = importlib.util.find_spec(module_name)
136
+ if spec and spec.origin:
137
+ origin_lower = spec.origin.lower()
138
+ # More robust check might be needed for all edge cases (e.g. frozen modules)
139
+ # but this covers common scenarios.
140
+ return "site-packages" not in origin_lower and "dist-packages" not in origin_lower
141
+ return False
142
+ except (ModuleNotFoundError, ImportError, AttributeError):
143
+ return False
144
+
145
+ def generate_requirements_txt(base_path: str, output_path: str):
146
  """
147
+ Generates a requirements.txt file by scanning Python files in a directory
148
+ for external (non-standard library, non-local) imports.
149
+ This function does NOT use the LLM.
150
 
151
  Args:
152
+ base_path: The root directory of the repository to scan.
153
+ output_path: The path where requirements.txt will be saved.
154
  """
155
  all_imports = set()
156
  local_modules = set()
157
 
158
+ ignore_dirs_set = {
159
+ '.git', '__pycache__', 'node_modules', 'venv', '.venv', 'env',
160
+ '.vscode', '.idea', 'build', 'dist', 'docs', 'tests', 'test',
161
+ 'examples', 'example', 'data', 'static', 'templates', 'assets', 'img', 'images', 'logs',
162
+ 'migrations', 'coverage'
163
+ }
164
+
165
+ for root, dirs, files in os.walk(base_path, topdown=True):
166
+ dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
167
+
168
  for file in files:
169
  if file.endswith(".py"):
170
  module_name = os.path.splitext(file)[0]
171
  local_modules.add(module_name)
172
+ if file == "__init__.py":
173
+ package_name = os.path.basename(root)
174
+ if package_name and package_name not in ignore_dirs_set and not package_name.startswith('.'):
175
+ local_modules.add(package_name)
176
 
177
+ for root, dirs, files in os.walk(base_path, topdown=True):
178
+ dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
179
  for file in files:
180
  if file.endswith(".py"):
181
  file_path = os.path.join(root, file)
182
  all_imports.update(extract_imports_from_file(file_path))
183
 
184
+ external_imports = sorted(list(set(
185
+
186
  imp for imp in all_imports
187
+ if imp and imp not in local_modules and not is_std_lib(imp)
188
+ )))
189
+
190
 
 
191
  with open(output_path, "w", encoding="utf-8") as f:
192
+ if not external_imports:
193
+ f.write("# No external Python dependencies found (or all are standard libraries/local modules).\n")
194
+ else:
195
+ for package in external_imports:
196
+ f.write(f"{package.lower()}\n")
index.md CHANGED
@@ -1,10 +1,14 @@
1
- πŸ“ repo/
 
 
2
  β”œβ”€β”€ .well-known/
3
- β”‚ β”œβ”€β”€ mcp.yaml
4
- β”œβ”€β”€ app.py ← Gradio + MCP server
 
5
  β”œβ”€β”€ doc_generator.py
 
 
6
  β”œβ”€β”€ mcp_server.py
7
- β”œβ”€β”€ readme_generator.py
8
- β”œβ”€β”€ requirements.txt
9
  β”œβ”€β”€ README.md
10
- └── index.md
 
 
1
+ πŸ“ AutoDocs_Project/
2
+ β”œβ”€β”€ .env.example
3
+ β”œβ”€β”€ .gitignore
4
  β”œβ”€β”€ .well-known/
5
+ β”‚ └── mcp.yaml
6
+ β”œβ”€β”€ app.py
7
+ β”œβ”€β”€ ask_agent.py
8
  β”œβ”€β”€ doc_generator.py
9
+ β”œβ”€β”€ index.md ← This file (placeholder in source, generated in output)
10
+ β”œβ”€β”€ llm_interface.py
11
  β”œβ”€β”€ mcp_server.py
 
 
12
  β”œβ”€β”€ README.md
13
+ β”œβ”€β”€ readme_generator.py
14
+ └── requirements.txt