The0eau commited on
Commit
dbdbd51
·
1 Parent(s): 43f6bd8
Files changed (4) hide show
  1. app.py +112 -293
  2. ask_agent.py +37 -103
  3. doc_generator.py +80 -135
  4. readme_generator.py +6 -4
app.py CHANGED
@@ -1,325 +1,144 @@
1
  import gradio as gr
2
  import os
3
  import shutil
4
- import subprocess
5
  import tempfile
6
  import zipfile
7
-
8
  import uuid
9
- from functools import partial
10
-
11
- # Import get_llm, but other modules will call it with current provider state
12
- from llm_interface import get_llm
13
- from ask_agent import ask_agent # ask_agent will call get_llm()
14
- from doc_generator import generate_documented_code, generate_requirements_txt # these too
15
- from readme_generator import generate_readme_from_zip # and this
16
-
17
- # Helper to get current LLM based on UI state
18
- # This is NOT how get_llm should be used directly by the modules.
19
- # Instead, the modules call get_llm() which now can take UI selected provider.
20
- # The `current_llm_provider_state` and `hf_endpoint_state` will be passed to `get_llm()`
21
- # from the functions that are directly invoked by Gradio events.
22
-
23
- def process_repo(repo_path, zip_output_name="AutoDocs",
24
- llm_provider_ui: str = None, hf_endpoint_ui: str = None,
25
- google_api_key_ui: str = None, hf_api_key_ui: str = None): # Pass UI choices
26
- """
27
- Processes a repository. Now calls get_llm with UI selected provider.
28
- """
29
- # Note: generate_documented_code, etc., will call get_llm() internally.
30
- # We need to ensure get_llm() can pick up these UI-set values.
31
- # This requires a change in how get_llm() is called or how state is managed globally.
32
- # For simplicity here, we're assuming the modules (doc_generator, etc.) will call
33
- # get_llm() and it will use the latest state (which is tricky with just env vars).
34
- # A better approach: pass the llm_instance to these functions.
35
- # OR: Make get_llm() aware of Gradio state (not ideal).
36
-
37
- # Let's make the processing functions accept the llm_provider and hf_endpoint
38
- # and they will pass it to get_llm when they need an LLM instance.
39
-
40
-
41
-
42
-
43
-
44
-
45
-
46
 
 
 
 
47
 
 
48
 
 
49
  with tempfile.TemporaryDirectory() as temp_output_dir:
50
- processed_repo_path = os.path.join(temp_output_dir, "processed_repo")
51
- shutil.copytree(repo_path, processed_repo_path)
52
-
53
- for root, _, files in os.walk(processed_repo_path):
54
  for file in files:
55
  if file.endswith(".py"):
56
  file_path = os.path.join(root, file)
57
- # Pass provider info to functions that use LLM
58
- generate_documented_code(file_path, file_path,
59
- llm_provider=llm_provider_ui,
60
- hf_endpoint=hf_endpoint_ui,
61
- hf_api_key=hf_api_key_ui,
62
- google_api_key=google_api_key_ui)
63
-
64
-
65
- requirements_path = os.path.join(processed_repo_path, "requirements.txt")
66
- generate_requirements_txt(processed_repo_path, requirements_path) # Does not use LLM
67
-
68
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip_for_readme:
69
- zip_path_for_readme = tmp_zip_for_readme.name
70
- with zipfile.ZipFile(zip_path_for_readme, "w", zipfile.ZIP_DEFLATED) as zipf:
71
- for r, _, fs in os.walk(processed_repo_path):
72
- for f_item in fs:
73
- full_p = os.path.join(r, f_item)
74
- arcname = os.path.relpath(full_p, processed_repo_path)
75
- zipf.write(full_p, arcname)
76
-
77
- with tempfile.TemporaryDirectory() as readme_out_dir:
78
- # Pass provider info
79
- readme_path, index_path = generate_readme_from_zip(
80
- zip_path_for_readme, readme_out_dir,
81
- llm_provider=llm_provider_ui,
82
- hf_endpoint=hf_endpoint_ui,
83
- hf_api_key=hf_api_key_ui,
84
- google_api_key=google_api_key_ui
85
- )
86
- shutil.copy2(readme_path, os.path.join(processed_repo_path, "README.md"))
87
- shutil.copy2(index_path, os.path.join(processed_repo_path, "index.md"))
88
-
89
- os.remove(zip_path_for_readme)
90
-
91
- output_zip_filename = f"{zip_output_name}_{uuid.uuid4().hex[:8]}.zip"
92
- output_zip_path = os.path.join(tempfile.gettempdir(), output_zip_filename)
93
-
94
-
95
-
96
-
97
-
98
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
99
- for r, _, fs in os.walk(processed_repo_path):
100
- for f_item in fs:
101
- full_p = os.path.join(r, f_item)
102
- arcname = os.path.relpath(full_p, processed_repo_path)
103
- zipf.write(full_p, arcname)
104
-
105
-
106
  return output_zip_path
107
 
108
- # --- Core processing functions now accept LLM provider details ---
109
- def actual_process_zip_upload(uploaded_zip_file, progress_tracker,
110
- llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
111
- progress_tracker(0, desc="Starting upload processing...")
112
  zip_path = uploaded_zip_file.name
113
- zip_name = os.path.splitext(os.path.basename(zip_path))[0]
114
 
115
  with tempfile.TemporaryDirectory() as temp_input_dir:
116
- progress_tracker(0.1, desc="Unzipping repository...")
117
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
118
-
119
  zip_ref.extractall(temp_input_dir)
120
- extracted_items = os.listdir(temp_input_dir)
121
- repo_root = temp_input_dir
122
- if len(extracted_items) == 1 and os.path.isdir(os.path.join(temp_input_dir, extracted_items[0])):
123
- repo_root = os.path.join(temp_input_dir, extracted_items[0])
124
- progress_tracker(0.3, desc="Generating documentation...")
125
- return process_repo(repo_root, zip_name, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
126
 
127
- def actual_process_github_clone(github_url, progress_tracker,
128
- llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
129
- if not github_url or not (github_url.startswith("https://") or github_url.startswith("http://")):
130
- return "❌ Please provide a valid GitHub URL.", ""
131
- progress_tracker(0, desc="Cloning GitHub repository...")
132
- repo_name_from_url = github_url.split("/")[-1].replace(".git", "")
133
  with tempfile.TemporaryDirectory() as clone_dir:
134
  try:
135
- subprocess.check_call(["git", "clone", "--depth", "1", github_url, clone_dir])
136
- progress_tracker(0.3, desc="Generating documentation...")
137
- return process_repo(clone_dir, repo_name_from_url, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
138
  except subprocess.CalledProcessError:
139
- return "❌ Error cloning the GitHub repository. Please check the URL.", ""
140
- except Exception as e:
141
- return f"❌ An unexpected error occurred: {e}", ""
142
-
143
- # --- Gradio UI event handler wrapper ---
144
- def process_and_update_state_handler(
145
- specific_processing_function, # e.g. actual_process_github_clone
146
- data_input, # e.g. github_url
147
- # LLM config from UI state:
148
- llm_provider_state: str,
149
- hf_endpoint_state: str,
150
- google_api_key_state: str,
151
- hf_api_key_state: str, # Added this
152
- progress=gr.Progress(track_tqdm=True)
153
- ):
154
- # Validate required fields based on provider
155
- if llm_provider_state == "GEMINI" and not google_api_key_state:
156
- return "❌ Google API Key is required for Gemini. Please set it in the LLM Configuration tab or .env file.", None
157
- if llm_provider_state == "HUGGINGFACE" and not hf_endpoint_state:
158
- return "❌ Hugging Face Model Endpoint is required. Please set it in the LLM Configuration tab or .env file.", None
159
-
160
- # Call the actual processing function with all necessary args
161
- result = specific_processing_function(
162
- data_input, progress,
163
- llm_provider_state, hf_endpoint_state,
164
- google_api_key_state, hf_api_key_state
165
- )
166
-
167
- if isinstance(result, tuple) and "❌" in result[0]: return result[0], None
168
- elif isinstance(result, str) and "❌" in result: return result, None
169
- elif isinstance(result, str) and os.path.exists(result): return result, result
170
- else: return f"❌ Unexpected result from processing: {result}", None
171
-
172
- # --- Gradio UI ---
173
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
174
- gr.Markdown("# 🤖 AutoDocs – Intelligent Documentation Generator")
175
-
176
-
177
-
178
-
179
-
180
 
181
- # --- State objects for LLM config ---
182
- # Initialize from .env or defaults
183
- default_provider = os.getenv("LLM_PROVIDER", "GEMINI").upper()
184
- default_hf_endpoint = os.getenv("HF_MODEL_ENDPOINT", "")
185
- default_google_api_key = os.getenv("GOOGLE_API_KEY", "")
186
- default_hf_api_key = os.getenv("HF_API_KEY", "")
187
-
188
-
189
- current_llm_provider_state = gr.State(default_provider)
190
- current_hf_endpoint_state = gr.State(default_hf_endpoint)
191
- current_google_api_key_state = gr.State(default_google_api_key)
192
- current_hf_api_key_state = gr.State(default_hf_api_key) # For user to input if needed for HF
193
-
194
- last_processed_repo_path_state = gr.State(None)
195
-
196
- # --- LLM Configuration Tab ---
197
- with gr.Tab("⚙️ LLM Configuration"):
198
- gr.Markdown("Configure your preferred Language Model provider. Settings here override `.env` file values for the current session.")
199
-
200
- selected_provider_radio = gr.Radio(
201
- ["GEMINI", "HUGGINGFACE"],
202
- label="Select LLM Provider",
203
- value=default_provider
204
- )
205
-
206
- # Gemini specific inputs
207
- with gr.Group(visible=(default_provider == "GEMINI")) as gemini_config_group:
208
- gr.Markdown("### Gemini Configuration")
209
- google_api_key_input = gr.Textbox(
210
- label="Google API Key",
211
- placeholder="Enter your Google API Key (starts with 'AIzaSy...')",
212
- value=default_google_api_key,
213
- type="password"
214
- )
215
-
216
- # Hugging Face specific inputs
217
- with gr.Group(visible=(default_provider == "HUGGINGFACE")) as hf_config_group:
218
- gr.Markdown("### Hugging Face Configuration")
219
- hf_endpoint_input = gr.Textbox(
220
- label="Hugging Face Model Endpoint URL",
221
- placeholder="e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
222
- value=default_hf_endpoint
223
- )
224
- hf_api_key_input = gr.Textbox( # Added HF API Key input
225
- label="Hugging Face API Key (Optional)",
226
- placeholder="Enter your Hugging Face API Key (starts with 'hf_') if needed",
227
- value=default_hf_api_key,
228
- type="password"
229
- )
230
-
231
- # Update visibility of config groups based on radio selection
232
- def toggle_config_visibility(provider_choice):
233
- is_gemini = provider_choice == "GEMINI"
234
- is_hf = provider_choice == "HUGGINGFACE"
235
- return {
236
- gemini_config_group: gr.update(visible=is_gemini),
237
- hf_config_group: gr.update(visible=is_hf),
238
- # Update state variables
239
- current_llm_provider_state: provider_choice
240
- }
241
-
242
- selected_provider_radio.change(
243
- fn=toggle_config_visibility,
244
- inputs=[selected_provider_radio],
245
- outputs=[gemini_config_group, hf_config_group, current_llm_provider_state]
246
  )
247
 
248
- # Update state when text inputs change
249
- hf_endpoint_input.change(lambda x: x, inputs=[hf_endpoint_input], outputs=[current_hf_endpoint_state])
250
- google_api_key_input.change(lambda x: x, inputs=[google_api_key_input], outputs=[current_google_api_key_state])
251
- hf_api_key_input.change(lambda x: x, inputs=[hf_api_key_input], outputs=[current_hf_api_key_state])
252
-
253
-
254
- # --- Processing Tabs (GitHub, ZIP) ---
255
- with gr.Tab("🌐 Process from GitHub URL"):
256
- github_url_input = gr.Text(label="GitHub Repository URL", placeholder="https://github.com/gradio-app/gradio")
257
- generate_btn_git = gr.Button("📄 Generate from GitHub", variant="primary")
258
- output_zip_git = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
259
-
260
- with gr.Tab("📦 Process from .zip upload"):
261
- zip_file_input = gr.File(label="Upload a .zip file of your repository", file_types=['.zip'])
262
- generate_btn_zip = gr.Button("📄 Generate from ZIP", variant="primary")
263
- output_zip_zip = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
264
-
265
- # --- Chat Tab ---
266
- with gr.Tab("🧠 Ask a Question about the Repo"):
267
- with gr.Column():
268
- gr.Markdown("Once you've processed a repository, you can ask questions about its content here. Uses the LLM configured in 'LLM Configuration' tab.")
269
- chatbot = gr.Chatbot(label="Agent Chat", height=500)
270
- user_input_tb = gr.Textbox(placeholder="e.g., 'What does the main function in app.py do?'", show_label=False, container=False)
271
- send_btn = gr.Button("✉️ Send")
272
-
273
- # --- Click Handlers ---
274
- # Now pass all relevant state variables to the handler
275
- generate_btn_git.click(
276
- fn=partial(process_and_update_state_handler, actual_process_github_clone),
277
- inputs=[
278
- github_url_input,
279
- current_llm_provider_state,
280
- current_hf_endpoint_state,
281
- current_google_api_key_state,
282
- current_hf_api_key_state
283
- ],
284
- outputs=[output_zip_git, last_processed_repo_path_state],
285
- )
286
-
287
  generate_btn_zip.click(
288
- fn=partial(process_and_update_state_handler, actual_process_zip_upload),
289
- inputs=[
290
- zip_file_input,
291
- current_llm_provider_state,
292
- current_hf_endpoint_state,
293
- current_google_api_key_state,
294
- current_hf_api_key_state
295
- ],
296
- outputs=[output_zip_zip, last_processed_repo_path_state],
297
  )
298
 
299
- def handle_chat_submit(history, message, repo_path_state,
300
- provider_state, hf_endpoint_s, google_api_key_s, hf_api_key_s): # Add provider states
301
- if not message.strip(): return history, message
302
-
303
- # Pass provider info to ask_agent
304
- updated_history, _ = ask_agent(
305
- history, message, repo_path_state,
306
- llm_provider=provider_state,
307
- hf_endpoint=hf_endpoint_s,
308
- google_api_key=google_api_key_s,
309
- hf_api_key=hf_api_key_s
310
- )
311
- return updated_history, ""
312
-
313
- # Gather all necessary states for the chat handler
314
- chat_inputs = [
315
- chatbot, user_input_tb, last_processed_repo_path_state,
316
- current_llm_provider_state, current_hf_endpoint_state,
317
- current_google_api_key_state, current_hf_api_key_state
318
- ]
319
- user_input_tb.submit(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
320
- send_btn.click(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
321
 
322
  if __name__ == "__main__":
323
- from dotenv import load_dotenv
324
- load_dotenv() # Load .env for defaults, UI can override
325
- demo.queue().launch() # Removed share=True for local testing
 
1
  import gradio as gr
2
  import os
3
  import shutil
 
4
  import tempfile
5
  import zipfile
6
+ import subprocess
7
  import uuid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ from ask_agent import ask_agent
10
+ from doc_generator import generate_documented_code, generate_requirements_txt
11
+ from readme_generator import generate_readme_from_zip
12
 
13
+ last_processed_repo_path = ""
14
 
15
+ def process_repo(repo_path, zip_output_name="AutoDocs"):
16
  with tempfile.TemporaryDirectory() as temp_output_dir:
17
+ # Document .py files
18
+ for root, _, files in os.walk(repo_path):
 
 
19
  for file in files:
20
  if file.endswith(".py"):
21
  file_path = os.path.join(root, file)
22
+ generate_documented_code(file_path, file_path)
23
+
24
+ # requirements.txt
25
+ requirements_path = os.path.join(repo_path, "requirements.txt")
26
+ generate_requirements_txt(repo_path, requirements_path)
27
+
28
+ # Create a temporary .zip for README/index
29
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip:
30
+ zip_path = tmp_zip.name
31
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
32
+ for root, _, files in os.walk(repo_path):
33
+ for file in files:
34
+ full_path = os.path.join(root, file)
35
+ rel_path = os.path.relpath(full_path, repo_path)
36
+ zipf.write(full_path, rel_path)
37
+
38
+ # README + index.md
39
+ readme_path, index_path = generate_readme_from_zip(zip_path, temp_output_dir)
40
+
41
+ # Copy the processed repo
42
+ for item in os.listdir(repo_path):
43
+ s = os.path.join(repo_path, item)
44
+ d = os.path.join(temp_output_dir, item)
45
+ if os.path.isdir(s):
46
+ shutil.copytree(s, d, dirs_exist_ok=True)
47
+ else:
48
+ shutil.copy2(s, d)
49
+
50
+ dest_readme = os.path.join(temp_output_dir, "README.md")
51
+ dest_index = os.path.join(temp_output_dir, "index.md")
52
+
53
+ if os.path.abspath(readme_path) != os.path.abspath(dest_readme):
54
+ shutil.copy2(readme_path, dest_readme)
55
+ if os.path.abspath(index_path) != os.path.abspath(dest_index):
56
+ shutil.copy2(index_path, dest_index)
57
+
58
+ # Output zip file with consistent name
59
+ output_zip_path = os.path.join(
60
+ tempfile.gettempdir(), f"{zip_output_name}.zip"
61
+ )
 
62
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
63
+ for root, _, files in os.walk(temp_output_dir):
64
+ for file in files:
65
+ full_path = os.path.join(root, file)
66
+ arcname = os.path.relpath(full_path, temp_output_dir)
67
+ zipf.write(full_path, arcname)
68
+ global last_processed_repo_path
69
+ last_processed_repo_path = output_zip_path
70
  return output_zip_path
71
 
72
+ def process_zip_upload(uploaded_zip_file):
 
 
 
73
  zip_path = uploaded_zip_file.name
74
+ zip_name = os.path.splitext(os.path.basename(zip_path))[0] # e.g., my_project.zip → my_project
75
 
76
  with tempfile.TemporaryDirectory() as temp_input_dir:
77
+ input_zip_path = os.path.join(temp_input_dir, "input_repo.zip")
78
+ shutil.copy(zip_path, input_zip_path)
79
+ with zipfile.ZipFile(input_zip_path, "r") as zip_ref:
80
  zip_ref.extractall(temp_input_dir)
 
 
 
 
 
 
81
 
82
+ extracted_dirs = [d for d in os.listdir(temp_input_dir) if os.path.isdir(os.path.join(temp_input_dir, d))]
83
+ repo_root = os.path.join(temp_input_dir, extracted_dirs[0]) if extracted_dirs else temp_input_dir
84
+
85
+ return process_repo(repo_root, zip_name)
86
+
87
+ def process_github_clone(github_url):
88
  with tempfile.TemporaryDirectory() as clone_dir:
89
  try:
90
+ subprocess.check_call(["git", "clone", github_url, clone_dir])
91
+ return process_repo(clone_dir)
 
92
  except subprocess.CalledProcessError:
93
+ return "❌ Error cloning the GitHub repository. Please check the URL."
94
+
95
+ # Wrapper for process_zip_upload that also returns the path for the state
96
+ def process_zip_and_update_state(uploaded_zip_file):
97
+ zip_path = process_zip_upload(uploaded_zip_file)
98
+ return zip_path, zip_path # (output for gr.File, output for gr.State)
99
+
100
+ # Wrapper for process_github_clone as well
101
+ def process_git_and_update_state(github_url):
102
+ zip_path = process_github_clone(github_url)
103
+ return zip_path, zip_path
104
+
105
+ # Gradio user interface
106
+ with gr.Blocks() as demo:
107
+ gr.Markdown("# 🤖 AutoDocs – Smart Documentation Generator")
108
+ last_processed_repo_path_state = gr.State(value="")
109
+ with gr.Tab("📦 Upload .zip"):
110
+ zip_file_input = gr.File(label="Drop your repo .zip file here", file_types=['.zip'])
111
+ generate_btn_zip = gr.Button("📄 Generate from ZIP")
112
+ output_zip_zip = gr.File(label="⬇️ Download your documented repo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ with gr.Tab("🌐 GitHub URL"):
115
+ github_url_input = gr.Text(label="Link to GitHub repository", placeholder="https://github.com/user/repo.git")
116
+ generate_btn_git = gr.Button("📄 Generate from GitHub")
117
+ output_zip_git = gr.File(label="⬇️ Download your documented repo")
118
+
119
+ with gr.Tab("🧠 Ask the agent about the repo"):
120
+ chatbot = gr.Chatbot()
121
+ user_input = gr.Textbox(placeholder="Ask your question here...")
122
+ send_btn = gr.Button("Send")
123
+
124
+ send_btn.click(
125
+ fn=ask_agent,
126
+ inputs=[chatbot, user_input, last_processed_repo_path_state],
127
+ outputs=[chatbot, user_input]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  )
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  generate_btn_zip.click(
131
+ fn=process_zip_and_update_state,
132
+ inputs=[zip_file_input],
133
+ outputs=[output_zip_zip, last_processed_repo_path_state]
 
 
 
 
 
 
134
  )
135
 
136
+ generate_btn_git.click(
137
+ fn=process_git_and_update_state,
138
+ inputs=[github_url_input],
139
+ outputs=[output_zip_git, last_processed_repo_path_state]
140
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  if __name__ == "__main__":
143
+ demo.queue()
144
+ demo.launch()
 
ask_agent.py CHANGED
@@ -1,128 +1,62 @@
1
  import os
2
  import tempfile
3
  import zipfile
4
- from typing import List, Tuple # Retain for Gradio history if not switching to 'messages' type
5
-
6
- from llm_interface import get_llm # Make sure this is imported
7
-
8
- def ask_agent(gradio_history: List[Tuple[str, str]],
9
- message: str,
10
- last_processed_repo_path: str,
11
- llm_provider: str = None,
12
- hf_endpoint: str = None,
13
- hf_api_key: str = None,
14
- google_api_key: str = None):
15
- """
16
- Handles a user's question about a processed repository using a conversational LLM.
17
-
18
- Args:
19
- gradio_history: The chat history from the Gradio chatbot component.
20
- message: The new message from the user.
21
- last_processed_repo_path: Path to the zip file of the last processed repo.
22
- llm_provider: The LLM provider chosen in the UI.
23
- hf_endpoint: The Hugging Face endpoint URL, if chosen.
24
- hf_api_key: The Hugging Face API key, if provided.
25
- google_api_key: The Google API key, if chosen.
26
-
27
- Returns:
28
- A tuple containing the updated Gradio history and an empty string for the textbox.
29
- """
30
- # Get LLM instance with current provider settings from UI/env
31
- llm = get_llm(provider=llm_provider,
32
- hf_endpoint=hf_endpoint,
33
- hf_api_key=hf_api_key,
34
- google_api_key=google_api_key)
35
-
36
- if not message or not message.strip():
37
- gradio_history.append((message, "Please enter a question."))
38
- return gradio_history, ""
39
 
 
 
 
 
40
 
 
41
 
42
  if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
43
- gradio_history.append((message, "📂 No repository has been processed yet. Please generate documentation for a repository first on the other tabs."))
44
- return gradio_history, ""
45
-
46
- if not zipfile.is_zipfile(last_processed_repo_path):
47
- gradio_history.append((message, f"❌ The stored path '{last_processed_repo_path}' is not a valid .zip file. Please re-process a repository."))
48
- return gradio_history, ""
49
 
50
-
51
- docs_and_code_content = ""
52
  with tempfile.TemporaryDirectory() as tmpdir:
53
- try:
54
- with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
55
- zip_ref.extractall(tmpdir)
56
- except zipfile.BadZipFile:
57
- gradio_history.append((message, "❌ The processed repository file seems corrupted. Please re-process a repository."))
58
- return gradio_history, ""
59
- except Exception as e:
60
- gradio_history.append((message, f"❌ Error extracting the repository: {e}. Please re-process."))
61
- return gradio_history, ""
62
-
63
 
 
64
  extensions_docs = [".md", ".txt"]
65
- extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php", ".yml", ".yaml", ".json", ".xml", ".html", ".css", ".sh"]
66
 
67
  all_files = []
68
- extracted_items = os.listdir(tmpdir)
69
- repo_scan_root = tmpdir
70
- if len(extracted_items) == 1 and os.path.isdir(os.path.join(tmpdir, extracted_items[0])):
71
- repo_scan_root = os.path.join(tmpdir, extracted_items[0])
72
-
73
- for root, _, files in os.walk(repo_scan_root):
74
  for file in files:
75
- _, ext = os.path.splitext(file)
76
- if ext.lower() in extensions_docs or ext.lower() in extensions_code:
77
  all_files.append(os.path.join(root, file))
78
 
79
  if not all_files:
80
- gradio_history.append((message, "📄 No relevant documentation or code files were found in the processed repository. It might be empty or contain unsupported file types."))
81
- return gradio_history, ""
82
-
83
- MAX_CONTENT_CHARS = 30000
84
- current_chars = 0
85
-
86
-
87
 
 
 
88
  for file_path in all_files:
89
- if current_chars >= MAX_CONTENT_CHARS:
90
- docs_and_code_content += "\n\n===== [Content Truncated due to size limit] ====="
91
- break
92
  try:
93
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
94
- file_content = f.read(MAX_CONTENT_CHARS - current_chars)
95
- rel_path = os.path.relpath(file_path, repo_scan_root)
96
- content_to_add = f"\n\n===== File: {rel_path} =====\n\n{file_content}"
97
- docs_and_code_content += content_to_add
98
- current_chars += len(content_to_add)
99
  except Exception as e:
100
- error_msg = f"\n\n===== Error reading file {os.path.relpath(file_path, repo_scan_root)}: {e} =====\n\n"
101
- docs_and_code_content += error_msg
102
- current_chars += len(error_msg)
103
 
104
- if not docs_and_code_content.strip():
105
- gradio_history.append((message, "📄 Could not read content from any relevant files in the repository."))
106
- return gradio_history, ""
107
-
108
-
109
- current_turn_prompt = (
110
- f"You are a helpful AI assistant, an expert in understanding code and project structures. "
111
- f"Based ONLY on the following project content, answer the user's question. "
112
- f"If the answer cannot be found in the provided content, say so. Do not invent information.\n\n"
113
- f"--- Project Content ---\n{docs_and_code_content}\n--- End Project Content ---\n\n"
114
- f"User Question: {message}\n\n"
115
- f"Your Answer (be clear, concise, and stay strictly within the provided content):"
116
  )
117
-
118
- chat_session_obj = llm.start_chat_session(history=gradio_history)
119
- # Check if starting the session itself failed (e.g., due to API key issues reported by get_llm/LLMInterface stubs)
120
- if isinstance(chat_session_obj, str) and chat_session_obj.startswith("❌"):
121
- # The error message from start_chat_session (or the stub) is the response
122
- gradio_history.append((message, chat_session_obj))
123
- return gradio_history, ""
124
 
125
- answer = llm.send_chat_message(session=chat_session_obj, message=current_turn_prompt)
126
- gradio_history.append((message, answer))
 
 
 
 
 
 
127
 
128
- return gradio_history, ""
 
1
  import os
2
  import tempfile
3
  import zipfile
4
+ import google.generativeai as genai
5
+ from dotenv import load_dotenv
6
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ API_KEY = os.getenv("GOOGLE_API_KEY")
9
+ genai.configure(api_key=API_KEY)
10
+ model = genai.GenerativeModel("models/gemini-2.0-flash")
11
+ chat_session = model.start_chat(history=[])
12
 
13
+ def ask_agent(history, message, last_processed_repo_path):
14
 
15
  if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
16
+ return history, "📂 No repository has been processed yet. Please generate documentation first."
 
 
 
 
 
17
 
 
 
18
  with tempfile.TemporaryDirectory() as tmpdir:
19
+ with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
20
+ zip_ref.extractall(tmpdir)
 
 
 
 
 
 
 
 
21
 
22
+ # Extensions for docs and code to consider
23
  extensions_docs = [".md", ".txt"]
24
+ extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php"]
25
 
26
  all_files = []
27
+ for root, _, files in os.walk(tmpdir):
 
 
 
 
 
28
  for file in files:
29
+ ext = os.path.splitext(file)[1].lower()
30
+ if ext in extensions_docs or ext in extensions_code:
31
  all_files.append(os.path.join(root, file))
32
 
33
  if not all_files:
34
+ return history, "📄 No documentation or code files found in the generated zip."
 
 
 
 
 
 
35
 
36
+ # Read and concatenate content
37
+ docs_and_code_content = ""
38
  for file_path in all_files:
 
 
 
39
  try:
40
+ with open(file_path, "r", encoding="utf-8") as f:
41
+ file_content = f.read()
42
+ rel_path = os.path.relpath(file_path, tmpdir)
43
+ docs_and_code_content += f"\n\n===== File: {rel_path} =====\n\n"
44
+ docs_and_code_content += file_content
 
45
  except Exception as e:
46
+ docs_and_code_content += f"\n\n===== Error reading file {file_path}: {str(e)} =====\n\n"
 
 
47
 
48
+ prompt = (
49
+ f"Here is the content of the project (documentation and code):\n\n{docs_and_code_content}\n\n"
50
+ f"Question: {message}\n\nPlease respond clearly and precisely."
 
 
 
 
 
 
 
 
 
51
  )
 
 
 
 
 
 
 
52
 
53
+ try:
54
+ response = chat_session.send_message(prompt)
55
+ answer = response.text
56
+ except Exception as e:
57
+ answer = f"❌ Error when calling Gemini: {str(e)}"
58
+
59
+ history = history or []
60
+ history.append((message, answer))
61
 
62
+ return history, ""
doc_generator.py CHANGED
@@ -1,196 +1,141 @@
1
- import ast
2
-
3
-
4
- import importlib.util
5
  import os
 
 
6
  import sys
 
7
 
8
- from llm_interface import get_llm # Make sure this is imported
9
-
10
-
11
-
12
-
13
-
14
 
 
 
 
 
 
15
 
16
  PROMPT = """You are an expert programming assistant.
17
- For the following Python code, perform the following actions:
18
- - The code structure (classes, functions, arguments, order) must remain exactly the same.
19
- - Add clear comments for each important step or complex logic.
20
- - Add Google-style docstrings for all classes and functions. Docstrings should explain the purpose, arguments (with types if possible from context), and what the function/method returns (if anything, with type).
21
- - Add type annotations for function arguments and return types. If a type is complex or unknown, use `typing.Any` or a descriptive placeholder if appropriate.
22
-
23
- Respond ONLY with the updated Python code. Do not add any explanatory text before or after the code block.
24
- Ensure the output is a single, valid Python code block.
25
-
26
-
27
  Here is the code:
28
- ```python
29
  {code}
30
  """
31
- def generate_documented_code(input_path: str, output_path: str,
32
- llm_provider: str = None, hf_endpoint: str = None,
33
- hf_api_key: str = None, google_api_key: str = None) -> str:
34
  """
35
- Generates documented code for a given file using the configured LLM.
 
36
  Args:
37
- input_path: Path to the Python file to document.
38
- output_path: Path where the documented Python file will be saved.
39
- llm_provider: The LLM provider chosen in the UI (e.g., "GEMINI", "HUGGINGFACE").
40
- hf_endpoint: The Hugging Face endpoint URL, if chosen.
41
- hf_api_key: The Hugging Face API key, if provided.
42
- google_api_key: The Google API key, if chosen.
43
-
44
-
45
  Returns:
46
- The content of the documented code, or the original code/error message on failure.
47
  """
48
- # Get LLM instance with current provider settings from UI/env
49
- llm = get_llm(provider=llm_provider,
50
- hf_endpoint=hf_endpoint,
51
- hf_api_key=hf_api_key,
52
- google_api_key=google_api_key)
53
 
54
- try:
55
- with open(input_path, "r", encoding="utf-8") as f:
56
- original_code = f.read()
57
- except Exception as e:
58
- print(f"Error reading input file {input_path}: {e}")
59
- error_content = f"# Error reading input file: {e}\n"
60
- # Still write to output_path so the file exists for zipping, even if it's an error message
61
- with open(output_path, "w", encoding="utf-8") as output_file:
62
- output_file.write(error_content)
63
- return error_content
64
-
65
-
66
- if not original_code.strip():
67
- with open(output_path, "w", encoding="utf-8") as output_file:
68
- output_file.write("") # Write empty if original is empty
69
- return ""
70
-
71
- formatted_prompt = PROMPT.format(code=original_code)
72
- updated_code = llm.generate_content(formatted_prompt) # Use the llm instance
73
-
74
- # Check if LLM returned an error message or empty content
75
- # The llm.generate_content itself should return "❌ ..." on failure
76
- if updated_code.startswith("❌") or not updated_code.strip():
77
- print(f"LLM failed to generate documented code for {input_path}. Using original code. LLM Output: {updated_code}")
78
- # Fallback: write original code to output path if LLM fails significantly
79
- with open(output_path, "w", encoding="utf-8") as output_file:
80
- output_file.write(original_code)
81
- # Return original code so the rest of the process can continue with undoc'd code
82
- return original_code
83
 
84
  with open(output_path, "w", encoding="utf-8") as output_file:
85
  output_file.write(updated_code)
86
 
87
  return updated_code
88
 
89
- def extract_imports_from_file(file_path: str) -> set:
90
-
91
 
 
92
  """
93
- Extracts imported module names from a Python file.
 
94
  Args:
95
- file_path: The path to the Python file.
96
 
97
  Returns:
98
- A set of top-level imported module names.
99
  """
100
- imports = set()
101
  try:
102
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
103
- source_code = f.read()
104
- if not source_code.strip():
105
- return imports
106
- try:
107
- tree = ast.parse(source_code)
108
- except SyntaxError:
109
- return imports
110
- except OSError:
111
- return imports
112
-
113
-
114
 
 
115
  for node in ast.walk(tree):
116
  if isinstance(node, ast.Import):
117
  for alias in node.names:
 
 
 
118
  imports.add(node.module.split('.')[0])
119
  return imports
120
 
121
- def is_std_lib(module_name: str) -> bool:
 
122
  """
123
- Checks if a module name is part of the Python standard library.
 
124
  Args:
125
- module_name: The name of the module.
126
 
127
  Returns:
128
- True if the module is standard library, False otherwise.
129
  """
130
- if not module_name:
131
- return False
132
  if module_name in sys.builtin_module_names:
133
  return True
134
- try:
135
- spec = importlib.util.find_spec(module_name)
136
- if spec and spec.origin:
137
- origin_lower = spec.origin.lower()
138
- # More robust check might be needed for all edge cases (e.g. frozen modules)
139
- # but this covers common scenarios.
140
- return "site-packages" not in origin_lower and "dist-packages" not in origin_lower
141
- return False
142
- except (ModuleNotFoundError, ImportError, AttributeError):
143
- return False
144
-
145
- def generate_requirements_txt(base_path: str, output_path: str):
146
  """
147
- Generates a requirements.txt file by scanning Python files in a directory
148
- for external (non-standard library, non-local) imports.
149
- This function does NOT use the LLM.
150
 
151
  Args:
152
- base_path: The root directory of the repository to scan.
153
- output_path: The path where requirements.txt will be saved.
154
  """
155
  all_imports = set()
156
  local_modules = set()
157
 
158
- ignore_dirs_set = {
159
- '.git', '__pycache__', 'node_modules', 'venv', '.venv', 'env',
160
- '.vscode', '.idea', 'build', 'dist', 'docs', 'tests', 'test',
161
- 'examples', 'example', 'data', 'static', 'templates', 'assets', 'img', 'images', 'logs',
162
- 'migrations', 'coverage'
163
- }
164
-
165
- for root, dirs, files in os.walk(base_path, topdown=True):
166
- dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
167
-
168
  for file in files:
169
  if file.endswith(".py"):
170
  module_name = os.path.splitext(file)[0]
171
  local_modules.add(module_name)
172
- if file == "__init__.py":
173
- package_name = os.path.basename(root)
174
- if package_name and package_name not in ignore_dirs_set and not package_name.startswith('.'):
175
- local_modules.add(package_name)
176
 
177
- for root, dirs, files in os.walk(base_path, topdown=True):
178
- dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
179
  for file in files:
180
  if file.endswith(".py"):
181
  file_path = os.path.join(root, file)
182
  all_imports.update(extract_imports_from_file(file_path))
183
 
184
- external_imports = sorted(list(set(
185
-
186
  imp for imp in all_imports
187
- if imp and imp not in local_modules and not is_std_lib(imp)
188
- )))
189
-
190
 
 
191
  with open(output_path, "w", encoding="utf-8") as f:
192
- if not external_imports:
193
- f.write("# No external Python dependencies found (or all are standard libraries/local modules).\n")
194
- else:
195
- for package in external_imports:
196
- f.write(f"{package.lower()}\n")
 
1
+ import google.generativeai as genai
2
+ import re
 
 
3
  import os
4
+ import ast
5
+ from dotenv import load_dotenv
6
  import sys
7
+ import importlib.util
8
 
9
+ load_dotenv()
 
 
 
 
 
10
 
11
+ API_KEY = os.getenv("GOOGLE_API_KEY")
12
+ if API_KEY is None:
13
+ raise ValueError("⚠️ The API key MY_API_KEY is missing! Check the Secrets in Hugging Face.")
14
+ genai.configure(api_key=API_KEY)
15
+ model = genai.GenerativeModel("models/gemini-2.0-flash")
16
 
17
  PROMPT = """You are an expert programming assistant.
18
+ For the following code, perform the following actions:
19
+ - The code must remain exactly the same
20
+ - Add clear comments for each important step.
21
+ - Rename variables if it makes the code easier to understand.
22
+ - Add type annotations if the language supports it.
23
+ - For each function, add a Google-style docstring (or equivalent format depending on the language).
24
+
25
+ Respond only with the updated code, no explanation.
 
 
26
  Here is the code:
27
+
28
  {code}
29
  """
30
+
31
+ def generate_documented_code(input_path: str, output_path: str) -> str:
 
32
  """
33
+ Generate a documented version of the code from the given input file and save it to the output file.
34
+
35
  Args:
36
+ input_path (str): Path to the original code file.
37
+ output_path (str): Path where the documented code will be saved.
38
+
 
 
 
 
 
39
  Returns:
40
+ str: The updated and documented code.
41
  """
42
+ with open(input_path, "r", encoding="utf-8") as f:
43
+ original_code = f.read()
 
 
 
44
 
45
+ prompt = PROMPT.format(code=original_code)
46
+ response = model.generate_content(prompt)
47
+ updated_code = response.text.strip()
48
+
49
+ # Clean up Markdown blocks if present
50
+ lines = updated_code.splitlines()
51
+ if len(lines) > 2:
52
+ lines = lines[1:-1] # remove the first and last lines
53
+ updated_code = "\n".join(lines)
54
+ else:
55
+ # if less than 3 lines, clear everything or keep as is depending on needs
56
+ updated_code = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  with open(output_path, "w", encoding="utf-8") as output_file:
59
  output_file.write(updated_code)
60
 
61
  return updated_code
62
 
 
 
63
 
64
+ def extract_imports_from_file(file_path):
65
  """
66
+ Extract imported modules from a Python file to generate requirements.txt.
67
+
68
  Args:
69
+ file_path (str): Path to the Python file.
70
 
71
  Returns:
72
+ set: A set of imported module names.
73
  """
 
74
  try:
75
+ with open(file_path, "r", encoding="utf-8") as f:
76
+ tree = ast.parse(f.read())
77
+ except SyntaxError:
78
+ return set()
 
 
 
 
 
 
 
 
79
 
80
+ imports = set()
81
  for node in ast.walk(tree):
82
  if isinstance(node, ast.Import):
83
  for alias in node.names:
84
+ imports.add(alias.name.split('.')[0])
85
+ elif isinstance(node, ast.ImportFrom):
86
+ if node.module and not node.module.startswith("."):
87
  imports.add(node.module.split('.')[0])
88
  return imports
89
 
90
+
91
+ def is_std_lib(module_name):
92
  """
93
+ Check if a module is part of the Python standard library.
94
+
95
  Args:
96
+ module_name (str): The name of the module.
97
 
98
  Returns:
99
+ bool: True if the module is part of the standard library, False otherwise.
100
  """
 
 
101
  if module_name in sys.builtin_module_names:
102
  return True
103
+ spec = importlib.util.find_spec(module_name)
104
+ return spec is not None and "site-packages" not in (spec.origin or "")
105
+
106
+
107
+ def generate_requirements_txt(base_path, output_path):
 
 
 
 
 
 
 
108
  """
109
+ Generate a requirements.txt file based on external imports found in Python files.
 
 
110
 
111
  Args:
112
+ base_path (str): Root directory of the codebase.
113
+ output_path (str): Path to save the generated requirements.txt file.
114
  """
115
  all_imports = set()
116
  local_modules = set()
117
 
118
+ # Get names of internal modules (i.e., .py files in the repo)
119
+ for root, _, files in os.walk(base_path):
 
 
 
 
 
 
 
 
120
  for file in files:
121
  if file.endswith(".py"):
122
  module_name = os.path.splitext(file)[0]
123
  local_modules.add(module_name)
 
 
 
 
124
 
125
+ # Extract all imports used in the project
126
+ for root, _, files in os.walk(base_path):
127
  for file in files:
128
  if file.endswith(".py"):
129
  file_path = os.path.join(root, file)
130
  all_imports.update(extract_imports_from_file(file_path))
131
 
132
+ # Remove internal modules and standard library modules
133
+ external_imports = sorted([
134
  imp for imp in all_imports
135
+ if imp not in local_modules and not is_std_lib(imp)
136
+ ])
 
137
 
138
+ # Write the requirements.txt file
139
  with open(output_path, "w", encoding="utf-8") as f:
140
+ for package in external_imports:
141
+ f.write(f"{package}\n")
 
 
 
readme_generator.py CHANGED
@@ -64,13 +64,13 @@ def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str):
64
  readme_path = os.path.join(output_dir, "README.md")
65
  index_path = os.path.join(output_dir, "index.md")
66
  os.makedirs(output_dir, exist_ok=True)
67
- # Clean markdown code blocks if they exist
68
  lines = readme_content.splitlines()
69
  if len(lines) > 2:
70
- lines = lines[1:-1] # remove the first and last lines
71
  readme_content = "\n".join(lines)
72
  else:
73
- # if less than 3 lines, empty or keep as needed
74
  readme_content = ""
75
 
76
  with open(readme_path, "w", encoding="utf-8") as f:
@@ -79,6 +79,7 @@ def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str):
79
  # ✅ Generate index from tempdir (correct location of extracted files)
80
  write_index_file(tempdir, index_path)
81
 
 
82
  return readme_path, index_path
83
 
84
  def generate_tree_structure(path: str, prefix: str = "") -> str:
@@ -100,7 +101,7 @@ def generate_tree_structure(path: str, prefix: str = "") -> str:
100
  lines.extend(subtree.splitlines()[1:]) # skip repeated dir name
101
 
102
  lines.extend(["├── README.md",
103
- "└── index.md"])
104
 
105
  return "\n".join(lines)
106
 
@@ -109,3 +110,4 @@ def write_index_file(project_path: str, output_path: str):
109
  structure = generate_tree_structure(project_path)
110
  with open(output_path, "w", encoding="utf-8") as f:
111
  f.write(structure)
 
 
64
  readme_path = os.path.join(output_dir, "README.md")
65
  index_path = os.path.join(output_dir, "index.md")
66
  os.makedirs(output_dir, exist_ok=True)
67
+ # Nettoyer les blocs Markdown s'ils existent
68
  lines = readme_content.splitlines()
69
  if len(lines) > 2:
70
+ lines = lines[1:-1] # enlève la première et la dernière ligne
71
  readme_content = "\n".join(lines)
72
  else:
73
+ # si moins de 3 lignes, on vide tout ou on garde tel quel selon le besoin
74
  readme_content = ""
75
 
76
  with open(readme_path, "w", encoding="utf-8") as f:
 
79
  # ✅ Generate index from tempdir (correct location of extracted files)
80
  write_index_file(tempdir, index_path)
81
 
82
+
83
  return readme_path, index_path
84
 
85
  def generate_tree_structure(path: str, prefix: str = "") -> str:
 
101
  lines.extend(subtree.splitlines()[1:]) # skip repeated dir name
102
 
103
  lines.extend(["├── README.md",
104
+ "└── index.md"])
105
 
106
  return "\n".join(lines)
107
 
 
110
  structure = generate_tree_structure(project_path)
111
  with open(output_path, "w", encoding="utf-8") as f:
112
  f.write(structure)
113
+