Spaces:

The0eau
/

Agents-MCP-Hackathon

Sleeping

App Files Files Community

The0eau commited on Jun 10, 2025

Commit

dbdbd51

1 Parent(s): 43f6bd8

update

Browse files

Files changed (4) hide show

app.py +112 -293
ask_agent.py +37 -103
doc_generator.py +80 -135
readme_generator.py +6 -4

app.py CHANGED Viewed

@@ -1,325 +1,144 @@
 import gradio as gr
 import os
 import shutil
-import subprocess
 import tempfile
 import zipfile
 import uuid
-from functools import partial
-# Import get_llm, but other modules will call it with current provider state
-from llm_interface import get_llm
-from ask_agent import ask_agent # ask_agent will call get_llm()
-from doc_generator import generate_documented_code, generate_requirements_txt # these too
-from readme_generator import generate_readme_from_zip # and this
-# Helper to get current LLM based on UI state
-# This is NOT how get_llm should be used directly by the modules.
-# Instead, the modules call get_llm() which now can take UI selected provider.
-# The `current_llm_provider_state` and `hf_endpoint_state` will be passed to `get_llm()`
-# from the functions that are directly invoked by Gradio events.
-def process_repo(repo_path, zip_output_name="AutoDocs",
-                 llm_provider_ui: str = None, hf_endpoint_ui: str = None,
-                 google_api_key_ui: str = None, hf_api_key_ui: str = None): # Pass UI choices
-    """
-    Processes a repository. Now calls get_llm with UI selected provider.
-    """
-    # Note: generate_documented_code, etc., will call get_llm() internally.
-    # We need to ensure get_llm() can pick up these UI-set values.
-    # This requires a change in how get_llm() is called or how state is managed globally.
-    # For simplicity here, we're assuming the modules (doc_generator, etc.) will call
-    # get_llm() and it will use the latest state (which is tricky with just env vars).
-    # A better approach: pass the llm_instance to these functions.
-    # OR: Make get_llm() aware of Gradio state (not ideal).
-    # Let's make the processing functions accept the llm_provider and hf_endpoint
-    # and they will pass it to get_llm when they need an LLM instance.
     with tempfile.TemporaryDirectory() as temp_output_dir:
-        processed_repo_path = os.path.join(temp_output_dir, "processed_repo")
-        shutil.copytree(repo_path, processed_repo_path)
-        for root, _, files in os.walk(processed_repo_path):
             for file in files:
                 if file.endswith(".py"):
                     file_path = os.path.join(root, file)
-                    # Pass provider info to functions that use LLM
-                    generate_documented_code(file_path, file_path,
-                                             llm_provider=llm_provider_ui,
-                                             hf_endpoint=hf_endpoint_ui,
-                                             hf_api_key=hf_api_key_ui,
-                                             google_api_key=google_api_key_ui)
-        requirements_path = os.path.join(processed_repo_path, "requirements.txt")
-        generate_requirements_txt(processed_repo_path, requirements_path) # Does not use LLM
-        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip_for_readme:
-            zip_path_for_readme = tmp_zip_for_readme.name
-            with zipfile.ZipFile(zip_path_for_readme, "w", zipfile.ZIP_DEFLATED) as zipf:
-                for r, _, fs in os.walk(processed_repo_path):
-                    for f_item in fs:
-                        full_p = os.path.join(r, f_item)
-                        arcname = os.path.relpath(full_p, processed_repo_path)
-                        zipf.write(full_p, arcname)
-        with tempfile.TemporaryDirectory() as readme_out_dir:
-            # Pass provider info
-            readme_path, index_path = generate_readme_from_zip(
-                zip_path_for_readme, readme_out_dir,
-                llm_provider=llm_provider_ui,
-                hf_endpoint=hf_endpoint_ui,
-                hf_api_key=hf_api_key_ui,
-                google_api_key=google_api_key_ui
-            )
-            shutil.copy2(readme_path, os.path.join(processed_repo_path, "README.md"))
-            shutil.copy2(index_path, os.path.join(processed_repo_path, "index.md"))
-        os.remove(zip_path_for_readme)
-        output_zip_filename = f"{zip_output_name}_{uuid.uuid4().hex[:8]}.zip"
-        output_zip_path = os.path.join(tempfile.gettempdir(), output_zip_filename)
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for r, _, fs in os.walk(processed_repo_path):
-                for f_item in fs:
-                    full_p = os.path.join(r, f_item)
-                    arcname = os.path.relpath(full_p, processed_repo_path)
-                    zipf.write(full_p, arcname)
         return output_zip_path
-# --- Core processing functions now accept LLM provider details ---
-def actual_process_zip_upload(uploaded_zip_file, progress_tracker,
-                              llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
-    progress_tracker(0, desc="Starting upload processing...")
     zip_path = uploaded_zip_file.name
-    zip_name = os.path.splitext(os.path.basename(zip_path))[0]
     with tempfile.TemporaryDirectory() as temp_input_dir:
-        progress_tracker(0.1, desc="Unzipping repository...")
-        with zipfile.ZipFile(zip_path, "r") as zip_ref:
             zip_ref.extractall(temp_input_dir)
-        extracted_items = os.listdir(temp_input_dir)
-        repo_root = temp_input_dir
-        if len(extracted_items) == 1 and os.path.isdir(os.path.join(temp_input_dir, extracted_items[0])):
-            repo_root = os.path.join(temp_input_dir, extracted_items[0])
-        progress_tracker(0.3, desc="Generating documentation...")
-        return process_repo(repo_root, zip_name, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
-def actual_process_github_clone(github_url, progress_tracker,
-                                llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
-    if not github_url or not (github_url.startswith("https://") or github_url.startswith("http://")):
-        return "❌ Please provide a valid GitHub URL.", ""
-    progress_tracker(0, desc="Cloning GitHub repository...")
-    repo_name_from_url = github_url.split("/")[-1].replace(".git", "")
     with tempfile.TemporaryDirectory() as clone_dir:
         try:
-            subprocess.check_call(["git", "clone", "--depth", "1", github_url, clone_dir])
-            progress_tracker(0.3, desc="Generating documentation...")
-            return process_repo(clone_dir, repo_name_from_url, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
         except subprocess.CalledProcessError:
-            return "❌ Error cloning the GitHub repository. Please check the URL.", ""
-        except Exception as e:
-            return f"❌ An unexpected error occurred: {e}", ""
-# --- Gradio UI event handler wrapper ---
-def process_and_update_state_handler(
-    specific_processing_function, # e.g. actual_process_github_clone
-    data_input, # e.g. github_url
-    # LLM config from UI state:
-    llm_provider_state: str,
-    hf_endpoint_state: str,
-    google_api_key_state: str,
-    hf_api_key_state: str, # Added this
-    progress=gr.Progress(track_tqdm=True)
-):
-    # Validate required fields based on provider
-    if llm_provider_state == "GEMINI" and not google_api_key_state:
-        return "❌ Google API Key is required for Gemini. Please set it in the LLM Configuration tab or .env file.", None
-    if llm_provider_state == "HUGGINGFACE" and not hf_endpoint_state:
-        return "❌ Hugging Face Model Endpoint is required. Please set it in the LLM Configuration tab or .env file.", None
-    # Call the actual processing function with all necessary args
-    result = specific_processing_function(
-        data_input, progress,
-        llm_provider_state, hf_endpoint_state,
-        google_api_key_state, hf_api_key_state
-    )
-    if isinstance(result, tuple) and "❌" in result[0]: return result[0], None
-    elif isinstance(result, str) and "❌" in result: return result, None
-    elif isinstance(result, str) and os.path.exists(result): return result, result
-    else: return f"❌ Unexpected result from processing: {result}", None
-# --- Gradio UI ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 AutoDocs – Intelligent Documentation Generator")
-    # --- State objects for LLM config ---
-    # Initialize from .env or defaults
-    default_provider = os.getenv("LLM_PROVIDER", "GEMINI").upper()
-    default_hf_endpoint = os.getenv("HF_MODEL_ENDPOINT", "")
-    default_google_api_key = os.getenv("GOOGLE_API_KEY", "")
-    default_hf_api_key = os.getenv("HF_API_KEY", "")
-    current_llm_provider_state = gr.State(default_provider)
-    current_hf_endpoint_state = gr.State(default_hf_endpoint)
-    current_google_api_key_state = gr.State(default_google_api_key)
-    current_hf_api_key_state = gr.State(default_hf_api_key) # For user to input if needed for HF
-    last_processed_repo_path_state = gr.State(None)
-    # --- LLM Configuration Tab ---
-    with gr.Tab("⚙️ LLM Configuration"):
-        gr.Markdown("Configure your preferred Language Model provider. Settings here override `.env` file values for the current session.")
-        selected_provider_radio = gr.Radio(
-            ["GEMINI", "HUGGINGFACE"],
-            label="Select LLM Provider",
-            value=default_provider
-        )
-        # Gemini specific inputs
-        with gr.Group(visible=(default_provider == "GEMINI")) as gemini_config_group:
-            gr.Markdown("### Gemini Configuration")
-            google_api_key_input = gr.Textbox(
-                label="Google API Key",
-                placeholder="Enter your Google API Key (starts with 'AIzaSy...')",
-                value=default_google_api_key,
-                type="password"
-            )
-        # Hugging Face specific inputs
-        with gr.Group(visible=(default_provider == "HUGGINGFACE")) as hf_config_group:
-            gr.Markdown("### Hugging Face Configuration")
-            hf_endpoint_input = gr.Textbox(
-                label="Hugging Face Model Endpoint URL",
-                placeholder="e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
-                value=default_hf_endpoint
-            )
-            hf_api_key_input = gr.Textbox( # Added HF API Key input
-                label="Hugging Face API Key (Optional)",
-                placeholder="Enter your Hugging Face API Key (starts with 'hf_') if needed",
-                value=default_hf_api_key,
-                type="password"
-            )
-        # Update visibility of config groups based on radio selection
-        def toggle_config_visibility(provider_choice):
-            is_gemini = provider_choice == "GEMINI"
-            is_hf = provider_choice == "HUGGINGFACE"
-            return {
-                gemini_config_group: gr.update(visible=is_gemini),
-                hf_config_group: gr.update(visible=is_hf),
-                # Update state variables
-                current_llm_provider_state: provider_choice
-            }
-        selected_provider_radio.change(
-            fn=toggle_config_visibility,
-            inputs=[selected_provider_radio],
-            outputs=[gemini_config_group, hf_config_group, current_llm_provider_state]
         )
-        # Update state when text inputs change
-        hf_endpoint_input.change(lambda x: x, inputs=[hf_endpoint_input], outputs=[current_hf_endpoint_state])
-        google_api_key_input.change(lambda x: x, inputs=[google_api_key_input], outputs=[current_google_api_key_state])
-        hf_api_key_input.change(lambda x: x, inputs=[hf_api_key_input], outputs=[current_hf_api_key_state])
-    # --- Processing Tabs (GitHub, ZIP) ---
-    with gr.Tab("🌐 Process from GitHub URL"):
-        github_url_input = gr.Text(label="GitHub Repository URL", placeholder="https://github.com/gradio-app/gradio")
-        generate_btn_git = gr.Button("📄 Generate from GitHub", variant="primary")
-        output_zip_git = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
-    with gr.Tab("📦 Process from .zip upload"):
-        zip_file_input = gr.File(label="Upload a .zip file of your repository", file_types=['.zip'])
-        generate_btn_zip = gr.Button("📄 Generate from ZIP", variant="primary")
-        output_zip_zip = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
-    # --- Chat Tab ---
-    with gr.Tab("🧠 Ask a Question about the Repo"):
-        with gr.Column():
-            gr.Markdown("Once you've processed a repository, you can ask questions about its content here. Uses the LLM configured in 'LLM Configuration' tab.")
-            chatbot = gr.Chatbot(label="Agent Chat", height=500)
-            user_input_tb = gr.Textbox(placeholder="e.g., 'What does the main function in app.py do?'", show_label=False, container=False)
-            send_btn = gr.Button("✉️ Send")
-    # --- Click Handlers ---
-    # Now pass all relevant state variables to the handler
-    generate_btn_git.click(
-        fn=partial(process_and_update_state_handler, actual_process_github_clone),
-        inputs=[
-            github_url_input,
-            current_llm_provider_state,
-            current_hf_endpoint_state,
-            current_google_api_key_state,
-            current_hf_api_key_state
-        ],
-        outputs=[output_zip_git, last_processed_repo_path_state],
-    )
     generate_btn_zip.click(
-        fn=partial(process_and_update_state_handler, actual_process_zip_upload),
-        inputs=[
-            zip_file_input,
-            current_llm_provider_state,
-            current_hf_endpoint_state,
-            current_google_api_key_state,
-            current_hf_api_key_state
-        ],
-        outputs=[output_zip_zip, last_processed_repo_path_state],
     )
-    def handle_chat_submit(history, message, repo_path_state,
-                           provider_state, hf_endpoint_s, google_api_key_s, hf_api_key_s): # Add provider states
-        if not message.strip(): return history, message
-        # Pass provider info to ask_agent
-        updated_history, _ = ask_agent(
-            history, message, repo_path_state,
-            llm_provider=provider_state,
-            hf_endpoint=hf_endpoint_s,
-            google_api_key=google_api_key_s,
-            hf_api_key=hf_api_key_s
-            )
-        return updated_history, ""
-    # Gather all necessary states for the chat handler
-    chat_inputs = [
-        chatbot, user_input_tb, last_processed_repo_path_state,
-        current_llm_provider_state, current_hf_endpoint_state,
-        current_google_api_key_state, current_hf_api_key_state
-    ]
-    user_input_tb.submit(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
-    send_btn.click(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
 if __name__ == "__main__":
-    from dotenv import load_dotenv
-    load_dotenv() # Load .env for defaults, UI can override
-    demo.queue().launch() # Removed share=True for local testing

 import gradio as gr
 import os
 import shutil
 import tempfile
 import zipfile
+import subprocess
 import uuid
+from ask_agent import ask_agent
+from doc_generator import generate_documented_code, generate_requirements_txt
+from readme_generator import generate_readme_from_zip
+last_processed_repo_path = ""
+def process_repo(repo_path, zip_output_name="AutoDocs"):
     with tempfile.TemporaryDirectory() as temp_output_dir:
+        # Document .py files
+        for root, _, files in os.walk(repo_path):
             for file in files:
                 if file.endswith(".py"):
                     file_path = os.path.join(root, file)
+                    generate_documented_code(file_path, file_path)
+        # requirements.txt
+        requirements_path = os.path.join(repo_path, "requirements.txt")
+        generate_requirements_txt(repo_path, requirements_path)
+        # Create a temporary .zip for README/index
+        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip:
+            zip_path = tmp_zip.name
+            with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+                for root, _, files in os.walk(repo_path):
+                    for file in files:
+                        full_path = os.path.join(root, file)
+                        rel_path = os.path.relpath(full_path, repo_path)
+                        zipf.write(full_path, rel_path)
+        # README + index.md
+        readme_path, index_path = generate_readme_from_zip(zip_path, temp_output_dir)
+        # Copy the processed repo
+        for item in os.listdir(repo_path):
+            s = os.path.join(repo_path, item)
+            d = os.path.join(temp_output_dir, item)
+            if os.path.isdir(s):
+                shutil.copytree(s, d, dirs_exist_ok=True)
+            else:
+                shutil.copy2(s, d)
+        dest_readme = os.path.join(temp_output_dir, "README.md")
+        dest_index = os.path.join(temp_output_dir, "index.md")
+        if os.path.abspath(readme_path) != os.path.abspath(dest_readme):
+            shutil.copy2(readme_path, dest_readme)
+        if os.path.abspath(index_path) != os.path.abspath(dest_index):
+            shutil.copy2(index_path, dest_index)
+        # Output zip file with consistent name
+        output_zip_path = os.path.join(
+            tempfile.gettempdir(), f"{zip_output_name}.zip"
+        )
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            for root, _, files in os.walk(temp_output_dir):
+                for file in files:
+                    full_path = os.path.join(root, file)
+                    arcname = os.path.relpath(full_path, temp_output_dir)
+                    zipf.write(full_path, arcname)
+        global last_processed_repo_path
+        last_processed_repo_path = output_zip_path
         return output_zip_path
+def process_zip_upload(uploaded_zip_file):
     zip_path = uploaded_zip_file.name
+    zip_name = os.path.splitext(os.path.basename(zip_path))[0]  # e.g., my_project.zip → my_project
     with tempfile.TemporaryDirectory() as temp_input_dir:
+        input_zip_path = os.path.join(temp_input_dir, "input_repo.zip")
+        shutil.copy(zip_path, input_zip_path)
+        with zipfile.ZipFile(input_zip_path, "r") as zip_ref:
             zip_ref.extractall(temp_input_dir)
+        extracted_dirs = [d for d in os.listdir(temp_input_dir) if os.path.isdir(os.path.join(temp_input_dir, d))]
+        repo_root = os.path.join(temp_input_dir, extracted_dirs[0]) if extracted_dirs else temp_input_dir
+        return process_repo(repo_root, zip_name)
+def process_github_clone(github_url):
     with tempfile.TemporaryDirectory() as clone_dir:
         try:
+            subprocess.check_call(["git", "clone", github_url, clone_dir])
+            return process_repo(clone_dir)
         except subprocess.CalledProcessError:
+            return "❌ Error cloning the GitHub repository. Please check the URL."
+# Wrapper for process_zip_upload that also returns the path for the state
+def process_zip_and_update_state(uploaded_zip_file):
+    zip_path = process_zip_upload(uploaded_zip_file)
+    return zip_path, zip_path  # (output for gr.File, output for gr.State)
+# Wrapper for process_github_clone as well
+def process_git_and_update_state(github_url):
+    zip_path = process_github_clone(github_url)
+    return zip_path, zip_path
+# Gradio user interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 AutoDocs – Smart Documentation Generator")
+    last_processed_repo_path_state = gr.State(value="")
+    with gr.Tab("📦 Upload .zip"):
+        zip_file_input = gr.File(label="Drop your repo .zip file here", file_types=['.zip'])
+        generate_btn_zip = gr.Button("📄 Generate from ZIP")
+        output_zip_zip = gr.File(label="⬇️ Download your documented repo")
+    with gr.Tab("🌐 GitHub URL"):
+        github_url_input = gr.Text(label="Link to GitHub repository", placeholder="https://github.com/user/repo.git")
+        generate_btn_git = gr.Button("📄 Generate from GitHub")
+        output_zip_git = gr.File(label="⬇️ Download your documented repo")
+    with gr.Tab("🧠 Ask the agent about the repo"):
+        chatbot = gr.Chatbot()
+        user_input = gr.Textbox(placeholder="Ask your question here...")
+        send_btn = gr.Button("Send")
+        send_btn.click(
+            fn=ask_agent,
+            inputs=[chatbot, user_input, last_processed_repo_path_state],
+            outputs=[chatbot, user_input]
         )
     generate_btn_zip.click(
+        fn=process_zip_and_update_state,
+        inputs=[zip_file_input],
+        outputs=[output_zip_zip, last_processed_repo_path_state]
     )
+    generate_btn_git.click(
+        fn=process_git_and_update_state,
+        inputs=[github_url_input],
+        outputs=[output_zip_git, last_processed_repo_path_state]
+    )
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

ask_agent.py CHANGED Viewed

@@ -1,128 +1,62 @@
 import os
 import tempfile
 import zipfile
-from typing import List, Tuple # Retain for Gradio history if not switching to 'messages' type
-from llm_interface import get_llm # Make sure this is imported
-def ask_agent(gradio_history: List[Tuple[str, str]],
-              message: str,
-              last_processed_repo_path: str,
-              llm_provider: str = None,
-              hf_endpoint: str = None,
-              hf_api_key: str = None,
-              google_api_key: str = None):
-    """
-    Handles a user's question about a processed repository using a conversational LLM.
-    Args:
-        gradio_history: The chat history from the Gradio chatbot component.
-        message: The new message from the user.
-        last_processed_repo_path: Path to the zip file of the last processed repo.
-        llm_provider: The LLM provider chosen in the UI.
-        hf_endpoint: The Hugging Face endpoint URL, if chosen.
-        hf_api_key: The Hugging Face API key, if provided.
-        google_api_key: The Google API key, if chosen.
-    Returns:
-        A tuple containing the updated Gradio history and an empty string for the textbox.
-    """
-    # Get LLM instance with current provider settings from UI/env
-    llm = get_llm(provider=llm_provider,
-                  hf_endpoint=hf_endpoint,
-                  hf_api_key=hf_api_key,
-                  google_api_key=google_api_key)
-    if not message or not message.strip():
-        gradio_history.append((message, "Please enter a question."))
-        return gradio_history, ""
     if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
-        gradio_history.append((message, "📂 No repository has been processed yet. Please generate documentation for a repository first on the other tabs."))
-        return gradio_history, ""
-    if not zipfile.is_zipfile(last_processed_repo_path):
-        gradio_history.append((message, f"❌ The stored path '{last_processed_repo_path}' is not a valid .zip file. Please re-process a repository."))
-        return gradio_history, ""
-    docs_and_code_content = ""
     with tempfile.TemporaryDirectory() as tmpdir:
-        try:
-            with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
-                zip_ref.extractall(tmpdir)
-        except zipfile.BadZipFile:
-            gradio_history.append((message, "❌ The processed repository file seems corrupted. Please re-process a repository."))
-            return gradio_history, ""
-        except Exception as e:
-            gradio_history.append((message, f"❌ Error extracting the repository: {e}. Please re-process."))
-            return gradio_history, ""
         extensions_docs = [".md", ".txt"]
-        extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php", ".yml", ".yaml", ".json", ".xml", ".html", ".css", ".sh"]
         all_files = []
-        extracted_items = os.listdir(tmpdir)
-        repo_scan_root = tmpdir
-        if len(extracted_items) == 1 and os.path.isdir(os.path.join(tmpdir, extracted_items[0])):
-            repo_scan_root = os.path.join(tmpdir, extracted_items[0])
-        for root, _, files in os.walk(repo_scan_root):
             for file in files:
-                _, ext = os.path.splitext(file)
-                if ext.lower() in extensions_docs or ext.lower() in extensions_code:
                     all_files.append(os.path.join(root, file))
         if not all_files:
-            gradio_history.append((message, "📄 No relevant documentation or code files were found in the processed repository. It might be empty or contain unsupported file types."))
-            return gradio_history, ""
-        MAX_CONTENT_CHARS = 30000
-        current_chars = 0
         for file_path in all_files:
-            if current_chars >= MAX_CONTENT_CHARS:
-                docs_and_code_content += "\n\n===== [Content Truncated due to size limit] ====="
-                break
             try:
-                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
-                    file_content = f.read(MAX_CONTENT_CHARS - current_chars)
-                rel_path = os.path.relpath(file_path, repo_scan_root)
-                content_to_add = f"\n\n===== File: {rel_path} =====\n\n{file_content}"
-                docs_and_code_content += content_to_add
-                current_chars += len(content_to_add)
             except Exception as e:
-                error_msg = f"\n\n===== Error reading file {os.path.relpath(file_path, repo_scan_root)}: {e} =====\n\n"
-                docs_and_code_content += error_msg
-                current_chars += len(error_msg)
-    if not docs_and_code_content.strip():
-        gradio_history.append((message, "📄 Could not read content from any relevant files in the repository."))
-        return gradio_history, ""
-    current_turn_prompt = (
-        f"You are a helpful AI assistant, an expert in understanding code and project structures. "
-        f"Based ONLY on the following project content, answer the user's question. "
-        f"If the answer cannot be found in the provided content, say so. Do not invent information.\n\n"
-        f"--- Project Content ---\n{docs_and_code_content}\n--- End Project Content ---\n\n"
-        f"User Question: {message}\n\n"
-        f"Your Answer (be clear, concise, and stay strictly within the provided content):"
     )
-    chat_session_obj = llm.start_chat_session(history=gradio_history)
-    # Check if starting the session itself failed (e.g., due to API key issues reported by get_llm/LLMInterface stubs)
-    if isinstance(chat_session_obj, str) and chat_session_obj.startswith("❌"):
-        # The error message from start_chat_session (or the stub) is the response
-        gradio_history.append((message, chat_session_obj))
-        return gradio_history, ""
-    answer = llm.send_chat_message(session=chat_session_obj, message=current_turn_prompt)
-    gradio_history.append((message, answer))
-    return gradio_history, ""

 import os
 import tempfile
 import zipfile
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+API_KEY = os.getenv("GOOGLE_API_KEY")
+genai.configure(api_key=API_KEY)
+model = genai.GenerativeModel("models/gemini-2.0-flash")
+chat_session = model.start_chat(history=[])
+def ask_agent(history, message, last_processed_repo_path):
     if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
+        return history, "📂 No repository has been processed yet. Please generate documentation first."
     with tempfile.TemporaryDirectory() as tmpdir:
+        with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
+            zip_ref.extractall(tmpdir)
+        # Extensions for docs and code to consider
         extensions_docs = [".md", ".txt"]
+        extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php"]
         all_files = []
+        for root, _, files in os.walk(tmpdir):
             for file in files:
+                ext = os.path.splitext(file)[1].lower()
+                if ext in extensions_docs or ext in extensions_code:
                     all_files.append(os.path.join(root, file))
         if not all_files:
+            return history, "📄 No documentation or code files found in the generated zip."
+        # Read and concatenate content
+        docs_and_code_content = ""
         for file_path in all_files:
             try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    file_content = f.read()
+                rel_path = os.path.relpath(file_path, tmpdir)
+                docs_and_code_content += f"\n\n===== File: {rel_path} =====\n\n"
+                docs_and_code_content += file_content
             except Exception as e:
+                docs_and_code_content += f"\n\n===== Error reading file {file_path}: {str(e)} =====\n\n"
+    prompt = (
+        f"Here is the content of the project (documentation and code):\n\n{docs_and_code_content}\n\n"
+        f"Question: {message}\n\nPlease respond clearly and precisely."
     )
+    try:
+        response = chat_session.send_message(prompt)
+        answer = response.text
+    except Exception as e:
+        answer = f"❌ Error when calling Gemini: {str(e)}"
+    history = history or []
+    history.append((message, answer))
+    return history, ""

doc_generator.py CHANGED Viewed

@@ -1,196 +1,141 @@
-import ast
-import importlib.util
 import os
 import sys
-from llm_interface import get_llm # Make sure this is imported
 PROMPT = """You are an expert programming assistant.
-For the following Python code, perform the following actions:
-- The code structure (classes, functions, arguments, order) must remain exactly the same.
-- Add clear comments for each important step or complex logic.
-- Add Google-style docstrings for all classes and functions. Docstrings should explain the purpose, arguments (with types if possible from context), and what the function/method returns (if anything, with type).
-- Add type annotations for function arguments and return types. If a type is complex or unknown, use `typing.Any` or a descriptive placeholder if appropriate.
-Respond ONLY with the updated Python code. Do not add any explanatory text before or after the code block.
-Ensure the output is a single, valid Python code block.
 Here is the code:
-```python
 {code}
 """
-def generate_documented_code(input_path: str, output_path: str,
-    llm_provider: str = None, hf_endpoint: str = None,
-    hf_api_key: str = None, google_api_key: str = None) -> str:
     """
-    Generates documented code for a given file using the configured LLM.
     Args:
-    input_path: Path to the Python file to document.
-    output_path: Path where the documented Python file will be saved.
-    llm_provider: The LLM provider chosen in the UI (e.g., "GEMINI", "HUGGINGFACE").
-    hf_endpoint: The Hugging Face endpoint URL, if chosen.
-    hf_api_key: The Hugging Face API key, if provided.
-    google_api_key: The Google API key, if chosen.
     Returns:
-        The content of the documented code, or the original code/error message on failure.
     """
-    # Get LLM instance with current provider settings from UI/env
-    llm = get_llm(provider=llm_provider,
-                hf_endpoint=hf_endpoint,
-                hf_api_key=hf_api_key,
-                google_api_key=google_api_key)
-    try:
-        with open(input_path, "r", encoding="utf-8") as f:
-            original_code = f.read()
-    except Exception as e:
-        print(f"Error reading input file {input_path}: {e}")
-        error_content = f"# Error reading input file: {e}\n"
-        # Still write to output_path so the file exists for zipping, even if it's an error message
-        with open(output_path, "w", encoding="utf-8") as output_file:
-            output_file.write(error_content)
-        return error_content
-    if not original_code.strip():
-        with open(output_path, "w", encoding="utf-8") as output_file:
-            output_file.write("") # Write empty if original is empty
-        return ""
-    formatted_prompt = PROMPT.format(code=original_code)
-    updated_code = llm.generate_content(formatted_prompt) # Use the llm instance
-    # Check if LLM returned an error message or empty content
-    # The llm.generate_content itself should return "❌ ..." on failure
-    if updated_code.startswith("❌") or not updated_code.strip():
-        print(f"LLM failed to generate documented code for {input_path}. Using original code. LLM Output: {updated_code}")
-        # Fallback: write original code to output path if LLM fails significantly
-        with open(output_path, "w", encoding="utf-8") as output_file:
-            output_file.write(original_code)
-        # Return original code so the rest of the process can continue with undoc'd code
-        return original_code
     with open(output_path, "w", encoding="utf-8") as output_file:
         output_file.write(updated_code)
     return updated_code
-def extract_imports_from_file(file_path: str) -> set:
     """
-    Extracts imported module names from a Python file.
     Args:
-    file_path: The path to the Python file.
     Returns:
-        A set of top-level imported module names.
     """
-    imports = set()
     try:
-        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
-            source_code = f.read()
-            if not source_code.strip():
-                return imports
-            try:
-                tree = ast.parse(source_code)
-            except SyntaxError:
-                return imports
-    except OSError:
-        return imports
     for node in ast.walk(tree):
         if isinstance(node, ast.Import):
             for alias in node.names:
                 imports.add(node.module.split('.')[0])
     return imports
-def is_std_lib(module_name: str) -> bool:
     """
-    Checks if a module name is part of the Python standard library.
     Args:
-    module_name: The name of the module.
     Returns:
-        True if the module is standard library, False otherwise.
     """
-    if not module_name:
-        return False
     if module_name in sys.builtin_module_names:
         return True
-    try:
-        spec = importlib.util.find_spec(module_name)
-        if spec and spec.origin:
-            origin_lower = spec.origin.lower()
-            # More robust check might be needed for all edge cases (e.g. frozen modules)
-            # but this covers common scenarios.
-            return "site-packages" not in origin_lower and "dist-packages" not in origin_lower
-        return False
-    except (ModuleNotFoundError, ImportError, AttributeError):
-        return False
-def generate_requirements_txt(base_path: str, output_path: str):
     """
-    Generates a requirements.txt file by scanning Python files in a directory
-    for external (non-standard library, non-local) imports.
-    This function does NOT use the LLM.
     Args:
-        base_path: The root directory of the repository to scan.
-        output_path: The path where requirements.txt will be saved.
     """
     all_imports = set()
     local_modules = set()
-    ignore_dirs_set = {
-        '.git', '__pycache__', 'node_modules', 'venv', '.venv', 'env',
-        '.vscode', '.idea', 'build', 'dist', 'docs', 'tests', 'test',
-        'examples', 'example', 'data', 'static', 'templates', 'assets', 'img', 'images', 'logs',
-        'migrations', 'coverage'
-    }
-    for root, dirs, files in os.walk(base_path, topdown=True):
-        dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
         for file in files:
             if file.endswith(".py"):
                 module_name = os.path.splitext(file)[0]
                 local_modules.add(module_name)
-                if file == "__init__.py":
-                    package_name = os.path.basename(root)
-                    if package_name and package_name not in ignore_dirs_set and not package_name.startswith('.'):
-                        local_modules.add(package_name)
-    for root, dirs, files in os.walk(base_path, topdown=True):
-        dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
         for file in files:
             if file.endswith(".py"):
                 file_path = os.path.join(root, file)
                 all_imports.update(extract_imports_from_file(file_path))
-    external_imports = sorted(list(set(
         imp for imp in all_imports
-        if imp and imp not in local_modules and not is_std_lib(imp)
-    )))
     with open(output_path, "w", encoding="utf-8") as f:
-        if not external_imports:
-            f.write("# No external Python dependencies found (or all are standard libraries/local modules).\n")
-        else:
-            for package in external_imports:
-                f.write(f"{package.lower()}\n")

+import google.generativeai as genai
+import re
 import os
+import ast
+from dotenv import load_dotenv
 import sys
+import importlib.util
+load_dotenv()
+API_KEY = os.getenv("GOOGLE_API_KEY")
+if API_KEY is None:
+    raise ValueError("⚠️ The API key MY_API_KEY is missing! Check the Secrets in Hugging Face.")
+genai.configure(api_key=API_KEY)
+model = genai.GenerativeModel("models/gemini-2.0-flash")
 PROMPT = """You are an expert programming assistant.
+For the following code, perform the following actions:
+- The code must remain exactly the same
+- Add clear comments for each important step.
+- Rename variables if it makes the code easier to understand.
+- Add type annotations if the language supports it.
+- For each function, add a Google-style docstring (or equivalent format depending on the language).
+Respond only with the updated code, no explanation.
 Here is the code:
 {code}
 """
+def generate_documented_code(input_path: str, output_path: str) -> str:
     """
+    Generate a documented version of the code from the given input file and save it to the output file.
     Args:
+        input_path (str): Path to the original code file.
+        output_path (str): Path where the documented code will be saved.
     Returns:
+        str: The updated and documented code.
     """
+    with open(input_path, "r", encoding="utf-8") as f:
+        original_code = f.read()
+    prompt = PROMPT.format(code=original_code)
+    response = model.generate_content(prompt)
+    updated_code = response.text.strip()
+    # Clean up Markdown blocks if present
+    lines = updated_code.splitlines()
+    if len(lines) > 2:
+        lines = lines[1:-1]  # remove the first and last lines
+        updated_code = "\n".join(lines)
+    else:
+        # if less than 3 lines, clear everything or keep as is depending on needs
+        updated_code = ""
     with open(output_path, "w", encoding="utf-8") as output_file:
         output_file.write(updated_code)
     return updated_code
+def extract_imports_from_file(file_path):
     """
+    Extract imported modules from a Python file to generate requirements.txt.
     Args:
+        file_path (str): Path to the Python file.
     Returns:
+        set: A set of imported module names.
     """
     try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            tree = ast.parse(f.read())
+    except SyntaxError:
+        return set()
+    imports = set()
     for node in ast.walk(tree):
         if isinstance(node, ast.Import):
             for alias in node.names:
+                imports.add(alias.name.split('.')[0])
+        elif isinstance(node, ast.ImportFrom):
+            if node.module and not node.module.startswith("."):
                 imports.add(node.module.split('.')[0])
     return imports
+def is_std_lib(module_name):
     """
+    Check if a module is part of the Python standard library.
     Args:
+        module_name (str): The name of the module.
     Returns:
+        bool: True if the module is part of the standard library, False otherwise.
     """
     if module_name in sys.builtin_module_names:
         return True
+    spec = importlib.util.find_spec(module_name)
+    return spec is not None and "site-packages" not in (spec.origin or "")
+def generate_requirements_txt(base_path, output_path):
     """
+    Generate a requirements.txt file based on external imports found in Python files.
     Args:
+        base_path (str): Root directory of the codebase.
+        output_path (str): Path to save the generated requirements.txt file.
     """
     all_imports = set()
     local_modules = set()
+    # Get names of internal modules (i.e., .py files in the repo)
+    for root, _, files in os.walk(base_path):
         for file in files:
             if file.endswith(".py"):
                 module_name = os.path.splitext(file)[0]
                 local_modules.add(module_name)
+    # Extract all imports used in the project
+    for root, _, files in os.walk(base_path):
         for file in files:
             if file.endswith(".py"):
                 file_path = os.path.join(root, file)
                 all_imports.update(extract_imports_from_file(file_path))
+    # Remove internal modules and standard library modules
+    external_imports = sorted([
         imp for imp in all_imports
+        if imp not in local_modules and not is_std_lib(imp)
+    ])
+    # Write the requirements.txt file
     with open(output_path, "w", encoding="utf-8") as f:
+        for package in external_imports:
+            f.write(f"{package}\n")

readme_generator.py CHANGED Viewed

@@ -64,13 +64,13 @@ def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str):
         readme_path = os.path.join(output_dir, "README.md")
         index_path = os.path.join(output_dir, "index.md")
         os.makedirs(output_dir, exist_ok=True)
-        # Clean markdown code blocks if they exist
         lines = readme_content.splitlines()
         if len(lines) > 2:
-            lines = lines[1:-1]  # remove the first and last lines
             readme_content = "\n".join(lines)
         else:
-            # if less than 3 lines, empty or keep as needed
             readme_content = ""
         with open(readme_path, "w", encoding="utf-8") as f:
@@ -79,6 +79,7 @@ def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str):
         # ✅ Generate index from tempdir (correct location of extracted files)
         write_index_file(tempdir, index_path)
         return readme_path, index_path
 def generate_tree_structure(path: str, prefix: str = "") -> str:
@@ -100,7 +101,7 @@ def generate_tree_structure(path: str, prefix: str = "") -> str:
             lines.extend(subtree.splitlines()[1:])  # skip repeated dir name
     lines.extend(["├── README.md",
-                  "└── index.md"])
     return "\n".join(lines)
@@ -109,3 +110,4 @@ def write_index_file(project_path: str, output_path: str):
     structure = generate_tree_structure(project_path)
     with open(output_path, "w", encoding="utf-8") as f:
         f.write(structure)

         readme_path = os.path.join(output_dir, "README.md")
         index_path = os.path.join(output_dir, "index.md")
         os.makedirs(output_dir, exist_ok=True)
+        # Nettoyer les blocs Markdown s'ils existent
         lines = readme_content.splitlines()
         if len(lines) > 2:
+            lines = lines[1:-1]  # enlève la première et la dernière ligne
             readme_content = "\n".join(lines)
         else:
+            # si moins de 3 lignes, on vide tout ou on garde tel quel selon le besoin
             readme_content = ""
         with open(readme_path, "w", encoding="utf-8") as f:
         # ✅ Generate index from tempdir (correct location of extracted files)
         write_index_file(tempdir, index_path)
         return readme_path, index_path
 def generate_tree_structure(path: str, prefix: str = "") -> str:
             lines.extend(subtree.splitlines()[1:])  # skip repeated dir name
     lines.extend(["├── README.md",
+    "└── index.md"])
     return "\n".join(lines)
     structure = generate_tree_structure(project_path)
     with open(output_path, "w", encoding="utf-8") as f:
         f.write(structure)