Spaces:

The0eau
/

Agents-MCP-Hackathon

Sleeping

App Files Files Community

The0eau commited on Jun 10, 2025

Commit

43f6bd8

1 Parent(s): d10890a

update

Browse files

Files changed (4) hide show

app.py +293 -112
ask_agent.py +103 -37
doc_generator.py +135 -80
index.md +10 -6

app.py CHANGED Viewed

@@ -1,144 +1,325 @@
 import gradio as gr
 import os
 import shutil
 import tempfile
 import zipfile
-import subprocess
 import uuid
-from ask_agent import ask_agent
-from doc_generator import generate_documented_code, generate_requirements_txt
-from readme_generator import generate_readme_from_zip
-last_processed_repo_path = ""
-def process_repo(repo_path, zip_output_name="AutoDocs"):
     with tempfile.TemporaryDirectory() as temp_output_dir:
-        # Document .py files
-        for root, _, files in os.walk(repo_path):
             for file in files:
                 if file.endswith(".py"):
                     file_path = os.path.join(root, file)
-                    generate_documented_code(file_path, file_path)
-        # requirements.txt
-        requirements_path = os.path.join(repo_path, "requirements.txt")
-        generate_requirements_txt(repo_path, requirements_path)
-        # Create a temporary .zip for README/index
-        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip:
-            zip_path = tmp_zip.name
-            with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
-                for root, _, files in os.walk(repo_path):
-                    for file in files:
-                        full_path = os.path.join(root, file)
-                        rel_path = os.path.relpath(full_path, repo_path)
-                        zipf.write(full_path, rel_path)
-        # README + index.md
-        readme_path, index_path = generate_readme_from_zip(zip_path, temp_output_dir)
-        # Copy the processed repo
-        for item in os.listdir(repo_path):
-            s = os.path.join(repo_path, item)
-            d = os.path.join(temp_output_dir, item)
-            if os.path.isdir(s):
-                shutil.copytree(s, d, dirs_exist_ok=True)
-            else:
-                shutil.copy2(s, d)
-        dest_readme = os.path.join(temp_output_dir, "README.md")
-        dest_index = os.path.join(temp_output_dir, "index.md")
-        if os.path.abspath(readme_path) != os.path.abspath(dest_readme):
-            shutil.copy2(readme_path, dest_readme)
-        if os.path.abspath(index_path) != os.path.abspath(dest_index):
-            shutil.copy2(index_path, dest_index)
-        # Output zip file with consistent name
-        output_zip_path = os.path.join(
-            tempfile.gettempdir(), f"{zip_output_name}.zip"
-        )
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for root, _, files in os.walk(temp_output_dir):
-                for file in files:
-                    full_path = os.path.join(root, file)
-                    arcname = os.path.relpath(full_path, temp_output_dir)
-                    zipf.write(full_path, arcname)
-        global last_processed_repo_path
-        last_processed_repo_path = output_zip_path
         return output_zip_path
-def process_zip_upload(uploaded_zip_file):
     zip_path = uploaded_zip_file.name
-    zip_name = os.path.splitext(os.path.basename(zip_path))[0]  # e.g., my_project.zip → my_project
     with tempfile.TemporaryDirectory() as temp_input_dir:
-        input_zip_path = os.path.join(temp_input_dir, "input_repo.zip")
-        shutil.copy(zip_path, input_zip_path)
-        with zipfile.ZipFile(input_zip_path, "r") as zip_ref:
-            zip_ref.extractall(temp_input_dir)
-        extracted_dirs = [d for d in os.listdir(temp_input_dir) if os.path.isdir(os.path.join(temp_input_dir, d))]
-        repo_root = os.path.join(temp_input_dir, extracted_dirs[0]) if extracted_dirs else temp_input_dir
-        return process_repo(repo_root, zip_name)
-def process_github_clone(github_url):
     with tempfile.TemporaryDirectory() as clone_dir:
         try:
-            subprocess.check_call(["git", "clone", github_url, clone_dir])
-            return process_repo(clone_dir)
         except subprocess.CalledProcessError:
-            return "❌ Error cloning the GitHub repository. Please check the URL."
-# Wrapper for process_zip_upload that also returns the path for the state
-def process_zip_and_update_state(uploaded_zip_file):
-    zip_path = process_zip_upload(uploaded_zip_file)
-    return zip_path, zip_path  # (output for gr.File, output for gr.State)
-# Wrapper for process_github_clone as well
-def process_git_and_update_state(github_url):
-    zip_path = process_github_clone(github_url)
-    return zip_path, zip_path
-# Gradio user interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 AutoDocs – Smart Documentation Generator")
-    last_processed_repo_path_state = gr.State(value="")
-    with gr.Tab("📦 Upload .zip"):
-        zip_file_input = gr.File(label="Drop your repo .zip file here", file_types=['.zip'])
-        generate_btn_zip = gr.Button("📄 Generate from ZIP")
-        output_zip_zip = gr.File(label="⬇️ Download your documented repo")
-    with gr.Tab("🌐 GitHub URL"):
-        github_url_input = gr.Text(label="Link to GitHub repository", placeholder="https://github.com/user/repo.git")
-        generate_btn_git = gr.Button("📄 Generate from GitHub")
-        output_zip_git = gr.File(label="⬇️ Download your documented repo")
-    with gr.Tab("🧠 Ask the agent about the repo"):
-        chatbot = gr.Chatbot()
-        user_input = gr.Textbox(placeholder="Ask your question here...")
-        send_btn = gr.Button("Send")
-        send_btn.click(
-            fn=ask_agent,
-            inputs=[chatbot, user_input, last_processed_repo_path_state],
-            outputs=[chatbot, user_input]
         )
-    generate_btn_zip.click(
-        fn=process_zip_and_update_state,
-        inputs=[zip_file_input],
-        outputs=[output_zip_zip, last_processed_repo_path_state]
-    )
     generate_btn_git.click(
-        fn=process_git_and_update_state,
-        inputs=[github_url_input],
-        outputs=[output_zip_git, last_processed_repo_path_state]
     )
 if __name__ == "__main__":
-    demo.queue()
-    demo.launch()

 import gradio as gr
 import os
 import shutil
+import subprocess
 import tempfile
 import zipfile
 import uuid
+from functools import partial
+# Import get_llm, but other modules will call it with current provider state
+from llm_interface import get_llm
+from ask_agent import ask_agent # ask_agent will call get_llm()
+from doc_generator import generate_documented_code, generate_requirements_txt # these too
+from readme_generator import generate_readme_from_zip # and this
+# Helper to get current LLM based on UI state
+# This is NOT how get_llm should be used directly by the modules.
+# Instead, the modules call get_llm() which now can take UI selected provider.
+# The `current_llm_provider_state` and `hf_endpoint_state` will be passed to `get_llm()`
+# from the functions that are directly invoked by Gradio events.
+def process_repo(repo_path, zip_output_name="AutoDocs",
+                 llm_provider_ui: str = None, hf_endpoint_ui: str = None,
+                 google_api_key_ui: str = None, hf_api_key_ui: str = None): # Pass UI choices
+    """
+    Processes a repository. Now calls get_llm with UI selected provider.
+    """
+    # Note: generate_documented_code, etc., will call get_llm() internally.
+    # We need to ensure get_llm() can pick up these UI-set values.
+    # This requires a change in how get_llm() is called or how state is managed globally.
+    # For simplicity here, we're assuming the modules (doc_generator, etc.) will call
+    # get_llm() and it will use the latest state (which is tricky with just env vars).
+    # A better approach: pass the llm_instance to these functions.
+    # OR: Make get_llm() aware of Gradio state (not ideal).
+    # Let's make the processing functions accept the llm_provider and hf_endpoint
+    # and they will pass it to get_llm when they need an LLM instance.
     with tempfile.TemporaryDirectory() as temp_output_dir:
+        processed_repo_path = os.path.join(temp_output_dir, "processed_repo")
+        shutil.copytree(repo_path, processed_repo_path)
+        for root, _, files in os.walk(processed_repo_path):
             for file in files:
                 if file.endswith(".py"):
                     file_path = os.path.join(root, file)
+                    # Pass provider info to functions that use LLM
+                    generate_documented_code(file_path, file_path,
+                                             llm_provider=llm_provider_ui,
+                                             hf_endpoint=hf_endpoint_ui,
+                                             hf_api_key=hf_api_key_ui,
+                                             google_api_key=google_api_key_ui)
+        requirements_path = os.path.join(processed_repo_path, "requirements.txt")
+        generate_requirements_txt(processed_repo_path, requirements_path) # Does not use LLM
+        with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_zip_for_readme:
+            zip_path_for_readme = tmp_zip_for_readme.name
+            with zipfile.ZipFile(zip_path_for_readme, "w", zipfile.ZIP_DEFLATED) as zipf:
+                for r, _, fs in os.walk(processed_repo_path):
+                    for f_item in fs:
+                        full_p = os.path.join(r, f_item)
+                        arcname = os.path.relpath(full_p, processed_repo_path)
+                        zipf.write(full_p, arcname)
+        with tempfile.TemporaryDirectory() as readme_out_dir:
+            # Pass provider info
+            readme_path, index_path = generate_readme_from_zip(
+                zip_path_for_readme, readme_out_dir,
+                llm_provider=llm_provider_ui,
+                hf_endpoint=hf_endpoint_ui,
+                hf_api_key=hf_api_key_ui,
+                google_api_key=google_api_key_ui
+            )
+            shutil.copy2(readme_path, os.path.join(processed_repo_path, "README.md"))
+            shutil.copy2(index_path, os.path.join(processed_repo_path, "index.md"))
+        os.remove(zip_path_for_readme)
+        output_zip_filename = f"{zip_output_name}_{uuid.uuid4().hex[:8]}.zip"
+        output_zip_path = os.path.join(tempfile.gettempdir(), output_zip_filename)
         with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            for r, _, fs in os.walk(processed_repo_path):
+                for f_item in fs:
+                    full_p = os.path.join(r, f_item)
+                    arcname = os.path.relpath(full_p, processed_repo_path)
+                    zipf.write(full_p, arcname)
         return output_zip_path
+# --- Core processing functions now accept LLM provider details ---
+def actual_process_zip_upload(uploaded_zip_file, progress_tracker,
+                              llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
+    progress_tracker(0, desc="Starting upload processing...")
     zip_path = uploaded_zip_file.name
+    zip_name = os.path.splitext(os.path.basename(zip_path))[0]
     with tempfile.TemporaryDirectory() as temp_input_dir:
+        progress_tracker(0.1, desc="Unzipping repository...")
+        with zipfile.ZipFile(zip_path, "r") as zip_ref:
+            zip_ref.extractall(temp_input_dir)
+        extracted_items = os.listdir(temp_input_dir)
+        repo_root = temp_input_dir
+        if len(extracted_items) == 1 and os.path.isdir(os.path.join(temp_input_dir, extracted_items[0])):
+            repo_root = os.path.join(temp_input_dir, extracted_items[0])
+        progress_tracker(0.3, desc="Generating documentation...")
+        return process_repo(repo_root, zip_name, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
+def actual_process_github_clone(github_url, progress_tracker,
+                                llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui):
+    if not github_url or not (github_url.startswith("https://") or github_url.startswith("http://")):
+        return "❌ Please provide a valid GitHub URL.", ""
+    progress_tracker(0, desc="Cloning GitHub repository...")
+    repo_name_from_url = github_url.split("/")[-1].replace(".git", "")
     with tempfile.TemporaryDirectory() as clone_dir:
         try:
+            subprocess.check_call(["git", "clone", "--depth", "1", github_url, clone_dir])
+            progress_tracker(0.3, desc="Generating documentation...")
+            return process_repo(clone_dir, repo_name_from_url, llm_provider_ui, hf_endpoint_ui, google_api_key_ui, hf_api_key_ui)
         except subprocess.CalledProcessError:
+            return "❌ Error cloning the GitHub repository. Please check the URL.", ""
+        except Exception as e:
+            return f"❌ An unexpected error occurred: {e}", ""
+# --- Gradio UI event handler wrapper ---
+def process_and_update_state_handler(
+    specific_processing_function, # e.g. actual_process_github_clone
+    data_input, # e.g. github_url
+    # LLM config from UI state:
+    llm_provider_state: str,
+    hf_endpoint_state: str,
+    google_api_key_state: str,
+    hf_api_key_state: str, # Added this
+    progress=gr.Progress(track_tqdm=True)
+):
+    # Validate required fields based on provider
+    if llm_provider_state == "GEMINI" and not google_api_key_state:
+        return "❌ Google API Key is required for Gemini. Please set it in the LLM Configuration tab or .env file.", None
+    if llm_provider_state == "HUGGINGFACE" and not hf_endpoint_state:
+        return "❌ Hugging Face Model Endpoint is required. Please set it in the LLM Configuration tab or .env file.", None
+    # Call the actual processing function with all necessary args
+    result = specific_processing_function(
+        data_input, progress,
+        llm_provider_state, hf_endpoint_state,
+        google_api_key_state, hf_api_key_state
+    )
+    if isinstance(result, tuple) and "❌" in result[0]: return result[0], None
+    elif isinstance(result, str) and "❌" in result: return result, None
+    elif isinstance(result, str) and os.path.exists(result): return result, result
+    else: return f"❌ Unexpected result from processing: {result}", None
+# --- Gradio UI ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 AutoDocs – Intelligent Documentation Generator")
+    # --- State objects for LLM config ---
+    # Initialize from .env or defaults
+    default_provider = os.getenv("LLM_PROVIDER", "GEMINI").upper()
+    default_hf_endpoint = os.getenv("HF_MODEL_ENDPOINT", "")
+    default_google_api_key = os.getenv("GOOGLE_API_KEY", "")
+    default_hf_api_key = os.getenv("HF_API_KEY", "")
+    current_llm_provider_state = gr.State(default_provider)
+    current_hf_endpoint_state = gr.State(default_hf_endpoint)
+    current_google_api_key_state = gr.State(default_google_api_key)
+    current_hf_api_key_state = gr.State(default_hf_api_key) # For user to input if needed for HF
+    last_processed_repo_path_state = gr.State(None)
+    # --- LLM Configuration Tab ---
+    with gr.Tab("⚙️ LLM Configuration"):
+        gr.Markdown("Configure your preferred Language Model provider. Settings here override `.env` file values for the current session.")
+        selected_provider_radio = gr.Radio(
+            ["GEMINI", "HUGGINGFACE"],
+            label="Select LLM Provider",
+            value=default_provider
         )
+        # Gemini specific inputs
+        with gr.Group(visible=(default_provider == "GEMINI")) as gemini_config_group:
+            gr.Markdown("### Gemini Configuration")
+            google_api_key_input = gr.Textbox(
+                label="Google API Key",
+                placeholder="Enter your Google API Key (starts with 'AIzaSy...')",
+                value=default_google_api_key,
+                type="password"
+            )
+        # Hugging Face specific inputs
+        with gr.Group(visible=(default_provider == "HUGGINGFACE")) as hf_config_group:
+            gr.Markdown("### Hugging Face Configuration")
+            hf_endpoint_input = gr.Textbox(
+                label="Hugging Face Model Endpoint URL",
+                placeholder="e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
+                value=default_hf_endpoint
+            )
+            hf_api_key_input = gr.Textbox( # Added HF API Key input
+                label="Hugging Face API Key (Optional)",
+                placeholder="Enter your Hugging Face API Key (starts with 'hf_') if needed",
+                value=default_hf_api_key,
+                type="password"
+            )
+        # Update visibility of config groups based on radio selection
+        def toggle_config_visibility(provider_choice):
+            is_gemini = provider_choice == "GEMINI"
+            is_hf = provider_choice == "HUGGINGFACE"
+            return {
+                gemini_config_group: gr.update(visible=is_gemini),
+                hf_config_group: gr.update(visible=is_hf),
+                # Update state variables
+                current_llm_provider_state: provider_choice
+            }
+        selected_provider_radio.change(
+            fn=toggle_config_visibility,
+            inputs=[selected_provider_radio],
+            outputs=[gemini_config_group, hf_config_group, current_llm_provider_state]
+        )
+        # Update state when text inputs change
+        hf_endpoint_input.change(lambda x: x, inputs=[hf_endpoint_input], outputs=[current_hf_endpoint_state])
+        google_api_key_input.change(lambda x: x, inputs=[google_api_key_input], outputs=[current_google_api_key_state])
+        hf_api_key_input.change(lambda x: x, inputs=[hf_api_key_input], outputs=[current_hf_api_key_state])
+    # --- Processing Tabs (GitHub, ZIP) ---
+    with gr.Tab("🌐 Process from GitHub URL"):
+        github_url_input = gr.Text(label="GitHub Repository URL", placeholder="https://github.com/gradio-app/gradio")
+        generate_btn_git = gr.Button("📄 Generate from GitHub", variant="primary")
+        output_zip_git = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
+    with gr.Tab("📦 Process from .zip upload"):
+        zip_file_input = gr.File(label="Upload a .zip file of your repository", file_types=['.zip'])
+        generate_btn_zip = gr.Button("📄 Generate from ZIP", variant="primary")
+        output_zip_zip = gr.File(label="⬇️ Download Your Documented Repo (.zip)")
+    # --- Chat Tab ---
+    with gr.Tab("🧠 Ask a Question about the Repo"):
+        with gr.Column():
+            gr.Markdown("Once you've processed a repository, you can ask questions about its content here. Uses the LLM configured in 'LLM Configuration' tab.")
+            chatbot = gr.Chatbot(label="Agent Chat", height=500)
+            user_input_tb = gr.Textbox(placeholder="e.g., 'What does the main function in app.py do?'", show_label=False, container=False)
+            send_btn = gr.Button("✉️ Send")
+    # --- Click Handlers ---
+    # Now pass all relevant state variables to the handler
     generate_btn_git.click(
+        fn=partial(process_and_update_state_handler, actual_process_github_clone),
+        inputs=[
+            github_url_input,
+            current_llm_provider_state,
+            current_hf_endpoint_state,
+            current_google_api_key_state,
+            current_hf_api_key_state
+        ],
+        outputs=[output_zip_git, last_processed_repo_path_state],
     )
+    generate_btn_zip.click(
+        fn=partial(process_and_update_state_handler, actual_process_zip_upload),
+        inputs=[
+            zip_file_input,
+            current_llm_provider_state,
+            current_hf_endpoint_state,
+            current_google_api_key_state,
+            current_hf_api_key_state
+        ],
+        outputs=[output_zip_zip, last_processed_repo_path_state],
+    )
+    def handle_chat_submit(history, message, repo_path_state,
+                           provider_state, hf_endpoint_s, google_api_key_s, hf_api_key_s): # Add provider states
+        if not message.strip(): return history, message
+        # Pass provider info to ask_agent
+        updated_history, _ = ask_agent(
+            history, message, repo_path_state,
+            llm_provider=provider_state,
+            hf_endpoint=hf_endpoint_s,
+            google_api_key=google_api_key_s,
+            hf_api_key=hf_api_key_s
+            )
+        return updated_history, ""
+    # Gather all necessary states for the chat handler
+    chat_inputs = [
+        chatbot, user_input_tb, last_processed_repo_path_state,
+        current_llm_provider_state, current_hf_endpoint_state,
+        current_google_api_key_state, current_hf_api_key_state
+    ]
+    user_input_tb.submit(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
+    send_btn.click(fn=handle_chat_submit, inputs=chat_inputs, outputs=[chatbot, user_input_tb])
 if __name__ == "__main__":
+    from dotenv import load_dotenv
+    load_dotenv() # Load .env for defaults, UI can override
+    demo.queue().launch() # Removed share=True for local testing

ask_agent.py CHANGED Viewed

@@ -1,62 +1,128 @@
 import os
 import tempfile
 import zipfile
-import google.generativeai as genai
-from dotenv import load_dotenv
-load_dotenv()
-API_KEY = os.getenv("GOOGLE_API_KEY")
-genai.configure(api_key=API_KEY)
-model = genai.GenerativeModel("models/gemini-2.0-flash")
-chat_session = model.start_chat(history=[])
-def ask_agent(history, message, last_processed_repo_path):
     if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
-        return history, "📂 No repository has been processed yet. Please generate documentation first."
     with tempfile.TemporaryDirectory() as tmpdir:
-        with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
-            zip_ref.extractall(tmpdir)
-        # Extensions for docs and code to consider
         extensions_docs = [".md", ".txt"]
-        extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php"]
         all_files = []
-        for root, _, files in os.walk(tmpdir):
             for file in files:
-                ext = os.path.splitext(file)[1].lower()
-                if ext in extensions_docs or ext in extensions_code:
                     all_files.append(os.path.join(root, file))
         if not all_files:
-            return history, "📄 No documentation or code files found in the generated zip."
-        # Read and concatenate content
-        docs_and_code_content = ""
         for file_path in all_files:
             try:
-                with open(file_path, "r", encoding="utf-8") as f:
-                    file_content = f.read()
-                rel_path = os.path.relpath(file_path, tmpdir)
-                docs_and_code_content += f"\n\n===== File: {rel_path} =====\n\n"
-                docs_and_code_content += file_content
             except Exception as e:
-                docs_and_code_content += f"\n\n===== Error reading file {file_path}: {str(e)} =====\n\n"
-    prompt = (
-        f"Here is the content of the project (documentation and code):\n\n{docs_and_code_content}\n\n"
-        f"Question: {message}\n\nPlease respond clearly and precisely."
-    )
-    try:
-        response = chat_session.send_message(prompt)
-        answer = response.text
-    except Exception as e:
-        answer = f"❌ Error when calling Gemini: {str(e)}"
-    history = history or []
-    history.append((message, answer))
-    return history, ""

 import os
 import tempfile
 import zipfile
+from typing import List, Tuple # Retain for Gradio history if not switching to 'messages' type
+from llm_interface import get_llm # Make sure this is imported
+def ask_agent(gradio_history: List[Tuple[str, str]],
+              message: str,
+              last_processed_repo_path: str,
+              llm_provider: str = None,
+              hf_endpoint: str = None,
+              hf_api_key: str = None,
+              google_api_key: str = None):
+    """
+    Handles a user's question about a processed repository using a conversational LLM.
+    Args:
+        gradio_history: The chat history from the Gradio chatbot component.
+        message: The new message from the user.
+        last_processed_repo_path: Path to the zip file of the last processed repo.
+        llm_provider: The LLM provider chosen in the UI.
+        hf_endpoint: The Hugging Face endpoint URL, if chosen.
+        hf_api_key: The Hugging Face API key, if provided.
+        google_api_key: The Google API key, if chosen.
+    Returns:
+        A tuple containing the updated Gradio history and an empty string for the textbox.
+    """
+    # Get LLM instance with current provider settings from UI/env
+    llm = get_llm(provider=llm_provider,
+                  hf_endpoint=hf_endpoint,
+                  hf_api_key=hf_api_key,
+                  google_api_key=google_api_key)
+    if not message or not message.strip():
+        gradio_history.append((message, "Please enter a question."))
+        return gradio_history, ""
     if not last_processed_repo_path or not os.path.exists(last_processed_repo_path):
+        gradio_history.append((message, "📂 No repository has been processed yet. Please generate documentation for a repository first on the other tabs."))
+        return gradio_history, ""
+    if not zipfile.is_zipfile(last_processed_repo_path):
+        gradio_history.append((message, f"❌ The stored path '{last_processed_repo_path}' is not a valid .zip file. Please re-process a repository."))
+        return gradio_history, ""
+    docs_and_code_content = ""
     with tempfile.TemporaryDirectory() as tmpdir:
+        try:
+            with zipfile.ZipFile(last_processed_repo_path, 'r') as zip_ref:
+                zip_ref.extractall(tmpdir)
+        except zipfile.BadZipFile:
+            gradio_history.append((message, "❌ The processed repository file seems corrupted. Please re-process a repository."))
+            return gradio_history, ""
+        except Exception as e:
+            gradio_history.append((message, f"❌ Error extracting the repository: {e}. Please re-process."))
+            return gradio_history, ""
         extensions_docs = [".md", ".txt"]
+        extensions_code = [".py", ".js", ".java", ".ts", ".cpp", ".c", ".cs", ".go", ".rb", ".swift", ".php", ".yml", ".yaml", ".json", ".xml", ".html", ".css", ".sh"]
         all_files = []
+        extracted_items = os.listdir(tmpdir)
+        repo_scan_root = tmpdir
+        if len(extracted_items) == 1 and os.path.isdir(os.path.join(tmpdir, extracted_items[0])):
+            repo_scan_root = os.path.join(tmpdir, extracted_items[0])
+        for root, _, files in os.walk(repo_scan_root):
             for file in files:
+                _, ext = os.path.splitext(file)
+                if ext.lower() in extensions_docs or ext.lower() in extensions_code:
                     all_files.append(os.path.join(root, file))
         if not all_files:
+            gradio_history.append((message, "📄 No relevant documentation or code files were found in the processed repository. It might be empty or contain unsupported file types."))
+            return gradio_history, ""
+        MAX_CONTENT_CHARS = 30000
+        current_chars = 0
         for file_path in all_files:
+            if current_chars >= MAX_CONTENT_CHARS:
+                docs_and_code_content += "\n\n===== [Content Truncated due to size limit] ====="
+                break
             try:
+                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                    file_content = f.read(MAX_CONTENT_CHARS - current_chars)
+                rel_path = os.path.relpath(file_path, repo_scan_root)
+                content_to_add = f"\n\n===== File: {rel_path} =====\n\n{file_content}"
+                docs_and_code_content += content_to_add
+                current_chars += len(content_to_add)
             except Exception as e:
+                error_msg = f"\n\n===== Error reading file {os.path.relpath(file_path, repo_scan_root)}: {e} =====\n\n"
+                docs_and_code_content += error_msg
+                current_chars += len(error_msg)
+    if not docs_and_code_content.strip():
+        gradio_history.append((message, "📄 Could not read content from any relevant files in the repository."))
+        return gradio_history, ""
+    current_turn_prompt = (
+        f"You are a helpful AI assistant, an expert in understanding code and project structures. "
+        f"Based ONLY on the following project content, answer the user's question. "
+        f"If the answer cannot be found in the provided content, say so. Do not invent information.\n\n"
+        f"--- Project Content ---\n{docs_and_code_content}\n--- End Project Content ---\n\n"
+        f"User Question: {message}\n\n"
+        f"Your Answer (be clear, concise, and stay strictly within the provided content):"
+    )
+    chat_session_obj = llm.start_chat_session(history=gradio_history)
+    # Check if starting the session itself failed (e.g., due to API key issues reported by get_llm/LLMInterface stubs)
+    if isinstance(chat_session_obj, str) and chat_session_obj.startswith("❌"):
+        # The error message from start_chat_session (or the stub) is the response
+        gradio_history.append((message, chat_session_obj))
+        return gradio_history, ""
+    answer = llm.send_chat_message(session=chat_session_obj, message=current_turn_prompt)
+    gradio_history.append((message, answer))
+    return gradio_history, ""

doc_generator.py CHANGED Viewed

@@ -1,141 +1,196 @@
-import google.generativeai as genai
-import re
-import os
 import ast
-from dotenv import load_dotenv
-import sys
 import importlib.util
-load_dotenv()
-API_KEY = os.getenv("GOOGLE_API_KEY")
-if API_KEY is None:
-    raise ValueError("⚠️ The API key MY_API_KEY is missing! Check the Secrets in Hugging Face.")
-genai.configure(api_key=API_KEY)
-model = genai.GenerativeModel("models/gemini-2.0-flash")
 PROMPT = """You are an expert programming assistant.
-For the following code, perform the following actions:
-- The code must remain exactly the same
-- Add clear comments for each important step.
-- Rename variables if it makes the code easier to understand.
-- Add type annotations if the language supports it.
-- For each function, add a Google-style docstring (or equivalent format depending on the language).
-Respond only with the updated code, no explanation.
-Here is the code:
 {code}
 """
-def generate_documented_code(input_path: str, output_path: str) -> str:
     """
-    Generate a documented version of the code from the given input file and save it to the output file.
     Args:
-        input_path (str): Path to the original code file.
-        output_path (str): Path where the documented code will be saved.
     Returns:
-        str: The updated and documented code.
     """
-    with open(input_path, "r", encoding="utf-8") as f:
-        original_code = f.read()
-    prompt = PROMPT.format(code=original_code)
-    response = model.generate_content(prompt)
-    updated_code = response.text.strip()
-    # Clean up Markdown blocks if present
-    lines = updated_code.splitlines()
-    if len(lines) > 2:
-        lines = lines[1:-1]  # remove the first and last lines
-        updated_code = "\n".join(lines)
-    else:
-        # if less than 3 lines, clear everything or keep as is depending on needs
-        updated_code = ""
     with open(output_path, "w", encoding="utf-8") as output_file:
         output_file.write(updated_code)
     return updated_code
-def extract_imports_from_file(file_path):
-    """
-    Extract imported modules from a Python file to generate requirements.txt.
     Args:
-        file_path (str): Path to the Python file.
     Returns:
-        set: A set of imported module names.
     """
     try:
-        with open(file_path, "r", encoding="utf-8") as f:
-            tree = ast.parse(f.read())
-    except SyntaxError:
-        return set()
-    imports = set()
     for node in ast.walk(tree):
         if isinstance(node, ast.Import):
             for alias in node.names:
-                imports.add(alias.name.split('.')[0])
-        elif isinstance(node, ast.ImportFrom):
-            if node.module and not node.module.startswith("."):
                 imports.add(node.module.split('.')[0])
     return imports
-def is_std_lib(module_name):
     """
-    Check if a module is part of the Python standard library.
     Args:
-        module_name (str): The name of the module.
     Returns:
-        bool: True if the module is part of the standard library, False otherwise.
     """
     if module_name in sys.builtin_module_names:
         return True
-    spec = importlib.util.find_spec(module_name)
-    return spec is not None and "site-packages" not in (spec.origin or "")
-def generate_requirements_txt(base_path, output_path):
     """
-    Generate a requirements.txt file based on external imports found in Python files.
     Args:
-        base_path (str): Root directory of the codebase.
-        output_path (str): Path to save the generated requirements.txt file.
     """
     all_imports = set()
     local_modules = set()
-    # Get names of internal modules (i.e., .py files in the repo)
-    for root, _, files in os.walk(base_path):
         for file in files:
             if file.endswith(".py"):
                 module_name = os.path.splitext(file)[0]
                 local_modules.add(module_name)
-    # Extract all imports used in the project
-    for root, _, files in os.walk(base_path):
         for file in files:
             if file.endswith(".py"):
                 file_path = os.path.join(root, file)
                 all_imports.update(extract_imports_from_file(file_path))
-    # Remove internal modules and standard library modules
-    external_imports = sorted([
         imp for imp in all_imports
-        if imp not in local_modules and not is_std_lib(imp)
-    ])
-    # Write the requirements.txt file
     with open(output_path, "w", encoding="utf-8") as f:
-        for package in external_imports:
-            f.write(f"{package}\n")

 import ast
 import importlib.util
+import os
+import sys
+from llm_interface import get_llm # Make sure this is imported
 PROMPT = """You are an expert programming assistant.
+For the following Python code, perform the following actions:
+- The code structure (classes, functions, arguments, order) must remain exactly the same.
+- Add clear comments for each important step or complex logic.
+- Add Google-style docstrings for all classes and functions. Docstrings should explain the purpose, arguments (with types if possible from context), and what the function/method returns (if anything, with type).
+- Add type annotations for function arguments and return types. If a type is complex or unknown, use `typing.Any` or a descriptive placeholder if appropriate.
+Respond ONLY with the updated Python code. Do not add any explanatory text before or after the code block.
+Ensure the output is a single, valid Python code block.
+Here is the code:
+```python
 {code}
 """
+def generate_documented_code(input_path: str, output_path: str,
+    llm_provider: str = None, hf_endpoint: str = None,
+    hf_api_key: str = None, google_api_key: str = None) -> str:
     """
+    Generates documented code for a given file using the configured LLM.
     Args:
+    input_path: Path to the Python file to document.
+    output_path: Path where the documented Python file will be saved.
+    llm_provider: The LLM provider chosen in the UI (e.g., "GEMINI", "HUGGINGFACE").
+    hf_endpoint: The Hugging Face endpoint URL, if chosen.
+    hf_api_key: The Hugging Face API key, if provided.
+    google_api_key: The Google API key, if chosen.
     Returns:
+        The content of the documented code, or the original code/error message on failure.
     """
+    # Get LLM instance with current provider settings from UI/env
+    llm = get_llm(provider=llm_provider,
+                hf_endpoint=hf_endpoint,
+                hf_api_key=hf_api_key,
+                google_api_key=google_api_key)
+    try:
+        with open(input_path, "r", encoding="utf-8") as f:
+            original_code = f.read()
+    except Exception as e:
+        print(f"Error reading input file {input_path}: {e}")
+        error_content = f"# Error reading input file: {e}\n"
+        # Still write to output_path so the file exists for zipping, even if it's an error message
+        with open(output_path, "w", encoding="utf-8") as output_file:
+            output_file.write(error_content)
+        return error_content
+    if not original_code.strip():
+        with open(output_path, "w", encoding="utf-8") as output_file:
+            output_file.write("") # Write empty if original is empty
+        return ""
+    formatted_prompt = PROMPT.format(code=original_code)
+    updated_code = llm.generate_content(formatted_prompt) # Use the llm instance
+    # Check if LLM returned an error message or empty content
+    # The llm.generate_content itself should return "❌ ..." on failure
+    if updated_code.startswith("❌") or not updated_code.strip():
+        print(f"LLM failed to generate documented code for {input_path}. Using original code. LLM Output: {updated_code}")
+        # Fallback: write original code to output path if LLM fails significantly
+        with open(output_path, "w", encoding="utf-8") as output_file:
+            output_file.write(original_code)
+        # Return original code so the rest of the process can continue with undoc'd code
+        return original_code
     with open(output_path, "w", encoding="utf-8") as output_file:
         output_file.write(updated_code)
     return updated_code
+def extract_imports_from_file(file_path: str) -> set:
+    """
+    Extracts imported module names from a Python file.
     Args:
+    file_path: The path to the Python file.
     Returns:
+        A set of top-level imported module names.
     """
+    imports = set()
     try:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            source_code = f.read()
+            if not source_code.strip():
+                return imports
+            try:
+                tree = ast.parse(source_code)
+            except SyntaxError:
+                return imports
+    except OSError:
+        return imports
     for node in ast.walk(tree):
         if isinstance(node, ast.Import):
             for alias in node.names:
                 imports.add(node.module.split('.')[0])
     return imports
+def is_std_lib(module_name: str) -> bool:
     """
+    Checks if a module name is part of the Python standard library.
     Args:
+    module_name: The name of the module.
     Returns:
+        True if the module is standard library, False otherwise.
     """
+    if not module_name:
+        return False
     if module_name in sys.builtin_module_names:
         return True
+    try:
+        spec = importlib.util.find_spec(module_name)
+        if spec and spec.origin:
+            origin_lower = spec.origin.lower()
+            # More robust check might be needed for all edge cases (e.g. frozen modules)
+            # but this covers common scenarios.
+            return "site-packages" not in origin_lower and "dist-packages" not in origin_lower
+        return False
+    except (ModuleNotFoundError, ImportError, AttributeError):
+        return False
+def generate_requirements_txt(base_path: str, output_path: str):
     """
+    Generates a requirements.txt file by scanning Python files in a directory
+    for external (non-standard library, non-local) imports.
+    This function does NOT use the LLM.
     Args:
+        base_path: The root directory of the repository to scan.
+        output_path: The path where requirements.txt will be saved.
     """
     all_imports = set()
     local_modules = set()
+    ignore_dirs_set = {
+        '.git', '__pycache__', 'node_modules', 'venv', '.venv', 'env',
+        '.vscode', '.idea', 'build', 'dist', 'docs', 'tests', 'test',
+        'examples', 'example', 'data', 'static', 'templates', 'assets', 'img', 'images', 'logs',
+        'migrations', 'coverage'
+    }
+    for root, dirs, files in os.walk(base_path, topdown=True):
+        dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
         for file in files:
             if file.endswith(".py"):
                 module_name = os.path.splitext(file)[0]
                 local_modules.add(module_name)
+                if file == "__init__.py":
+                    package_name = os.path.basename(root)
+                    if package_name and package_name not in ignore_dirs_set and not package_name.startswith('.'):
+                        local_modules.add(package_name)
+    for root, dirs, files in os.walk(base_path, topdown=True):
+        dirs[:] = [d for d in dirs if d not in ignore_dirs_set and not d.startswith('.')]
         for file in files:
             if file.endswith(".py"):
                 file_path = os.path.join(root, file)
                 all_imports.update(extract_imports_from_file(file_path))
+    external_imports = sorted(list(set(
         imp for imp in all_imports
+        if imp and imp not in local_modules and not is_std_lib(imp)
+    )))
     with open(output_path, "w", encoding="utf-8") as f:
+        if not external_imports:
+            f.write("# No external Python dependencies found (or all are standard libraries/local modules).\n")
+        else:
+            for package in external_imports:
+                f.write(f"{package.lower()}\n")

index.md CHANGED Viewed

@@ -1,10 +1,14 @@
-📁 repo/
 ├── .well-known/
-│   ├── mcp.yaml
-├── app.py ← Gradio + MCP server
 ├── doc_generator.py
 ├── mcp_server.py
-├── readme_generator.py
-├── requirements.txt
 ├── README.md
-└── index.md

+📁 AutoDocs_Project/
+├── .env.example
+├── .gitignore
 ├── .well-known/
+│ └── mcp.yaml
+├── app.py
+├── ask_agent.py
 ├── doc_generator.py
+├── index.md ← This file (placeholder in source, generated in output)
+├── llm_interface.py
 ├── mcp_server.py
 ├── README.md
+├── readme_generator.py
+└── requirements.txt