Spaces:

doodle-med
/

Text-agent

Sleeping

App Files Files Community

doodle-med commited on Jul 9, 2025

Commit

b859c11

verified ·

1 Parent(s): b5ba55e

Update App.py

Browse files

Files changed (1) hide show

App.py +114 -189

App.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# app.py
 # A production-quality, local, and uncensored text-editing agent
-# that can read, reason over, and rewrite large documents.
-import asyncio
 import gradio as gr
 import pathlib
 import re
@@ -10,241 +9,167 @@ import textwrap
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.agents import Agent, Tool
 from fastmcp import FastMCP
 # --- Configuration ---
-# Use a more descriptive model name for clarity
 MODEL_ID = "NousResearch/Meta-Llama-3-8B-Instruct-GPTQ"
-# Sandbox all file operations to this directory for security
 ROOT = pathlib.Path("workspace")
-ROOT.mkdir(exist_ok=True) # Ensure the workspace directory exists
 # --- 1. MCP Text-Editing Server (The "Tools" Backend) ---
-# This server runs locally and provides the agent with tools to interact with files.
 server = FastMCP("DocTools")
 @server.tool()
 def list_files(relative_path: str = ".") -> list[str]:
-    """
-    Lists all files and directories within a given subdirectory of the workspace.
-    Args:
-        relative_path (str): The subdirectory path relative to the workspace root.
-                             Defaults to the current directory ('.').
-    """
     try:
-        # Security: Prevent directory traversal attacks
         safe_path = (ROOT / relative_path).resolve()
-        if not safe_path.is_relative_to(ROOT.resolve()):
-            return ["Error: Access denied. Path is outside the workspace."]
-        if not safe_path.exists():
-            return [f"Error: Directory '{relative_path}' not found."]
         return [p.name for p in safe_path.iterdir()]
-    except Exception as e:
-        return [f"An error occurred: {str(e)}"]
 @server.tool()
 def search_in_file(file_path: str, pattern: str, max_hits: int = 40) -> list[str]:
-    """
-    Searches for a regex pattern within a specified file in the workspace.
-    Args:
-        file_path (str): The path to the file relative to the workspace root.
-        pattern (str): The regular expression pattern to search for (case-insensitive).
-        max_hits (int): The maximum number of matching lines to return.
-    """
     try:
-        # Security: Resolve and check the file path
         safe_path = (ROOT / file_path).resolve()
-        if not safe_path.is_relative_to(ROOT.resolve()):
-            return ["Error: Access denied. Path is outside the workspace."]
-        if not safe_path.is_file():
-            return [f"Error: File '{file_path}' not found."]
-        output = []
-        regex = re.compile(pattern, re.IGNORECASE)
         with open(safe_path, 'r', encoding='utf-8') as f:
             for i, line in enumerate(f):
                 if regex.search(line):
                     output.append(f"{i+1}: {line.rstrip()}")
-                    if len(output) >= max_hits:
-                        break
         return output if output else ["No matches found."]
-    except Exception as e:
-        return [f"An error occurred while reading the file: {str(e)}"]
 @server.tool()
 def read_lines(file_path: str, start_line: int, end_line: int) -> str:
-    """
-    Reads and returns a specific range of lines from a file.
-    Args:
-        file_path (str): The path to the file relative to the workspace root.
-        start_line (int): The starting line number (1-indexed).
-        end_line (int): The ending line number (inclusive).
-    """
     try:
-        # Security: Resolve and check the file path
         safe_path = (ROOT / file_path).resolve()
-        if not safe_path.is_relative_to(ROOT.resolve()):
-            return "Error: Access denied. Path is outside the workspace."
-        if not safe_path.is_file():
-            return f"Error: File '{file_path}' not found."
-        with open(safe_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        # Adjust for 0-based indexing and ensure bounds are valid
-        start_index = max(0, start_line - 1)
-        end_index = min(len(lines), end_line)
-        return "".join(lines[start_index:end_index])
-    except Exception as e:
-        return f"An error occurred: {str(e)}"
 @server.tool()
 def patch_file(file_path: str, start_line: int, end_line: int, new_content: str) -> str:
-    """
-    Replaces a range of lines in a file with new content.
-    Args:
-        file_path (str): The path to the file relative to the workspace root.
-        start_line (int): The starting line number for replacement (1-indexed).
-        end_line (int): The ending line number for replacement (inclusive).
-        new_content (str): The new text to insert.
-    """
     try:
-        # Security: Resolve and check the file path
         safe_path = (ROOT / file_path).resolve()
-        if not safe_path.is_relative_to(ROOT.resolve()):
-            return "Error: Access denied. Path is outside the workspace."
-        if not safe_path.is_file():
-            return f"Error: File '{file_path}' not found."
-        with open(safe_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        start_index = max(0, start_line - 1)
-        # Create the new file content in memory
-        new_lines = (
-            lines[:start_index] +
-            [line + '\n' for line in new_content.splitlines()] +
-            lines[end_line:]
         )
-        with open(safe_path, 'w', encoding='utf-8') as f:
-            f.writelines(new_lines)
-        return f"Success: Patched lines {start_line}-{end_line} in '{file_path}'."
-    except Exception as e:
-        return f"An error occurred during patching: {str(e)}"
-# --- 2. Local Function-Calling LLM ---
-# Initialize the model and tokenizer for the agent.
-# Using a GPTQ quantized model for efficient inference on GPUs.
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",
-    # 8-bit quantization for a balance of speed and performance.
-    quantization_config={"bits": 8, "load_in_8bit": True}
-)
-# Create the pipeline for text generation with streaming capabilities.
-llm_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    return_full_text=False, # Essential for streaming and agent control
-    max_new_tokens=1024,
-)
-# --- 3. Transformers Agent Orchestrator ---
-# This agent coordinates the LLM and the tools to accomplish user goals.
-def build_hf_tool(mcp_tool_name: str) -> Tool:
-    """Dynamically creates a Hugging Face Tool from a FastMCP tool's schema."""
-    schema = server.get_schema(mcp_tool_name)
-    # The actual function that the agent will call
-    def tool_function(**kwargs):
-        # The FastMCP server handles the invocation internally
-        return server.invoke(mcp_tool_name, **kwargs)
-    return Tool(
-        name=mcp_tool_name,
-        description=schema["description"],
-        inputs=schema["parameters"],
-        function=tool_function
-    )
-# Automatically build HF tools from all registered MCP server tools
-tools = [build_hf_tool(tool_name) for tool_name in server.list_tools()]
-# System prompt to define the agent's role and constraints
-SYSTEM_PROMPT = textwrap.dedent("""
-    You are an expert technical editor and programmer.
-    Your task is to assist the user by performing file operations.
-    You have access to a set of tools for listing, searching, reading, and modifying files.
-    - All file paths are relative to the '/workspace' directory.
-    - Always verify file contents with `read_lines` or `search_in_file` before attempting to modify a file with `patch_file`.
-    - When you are done, provide a summary of the actions you have taken.
-""")
-# Initialize the agent with the LLM, tools, and a system prompt.
-# memory=True enables conversational history.
-agent = Agent(
-    llm_pipeline=llm_pipeline,
-    tools=tools,
-    system_prompt=SYSTEM_PROMPT,
-    max_steps=10, # Increased max steps for more complex tasks
-    memory=True
-)
-# --- 4. Interactive Gradio Chat Application ---
-async def chat_fn(history: list, user_message: str):
-    """
-    Handles the chat interaction, streaming the agent's response back to the UI.
-    """
-    history.append((user_message, None))
-    # Use astream for real-time streaming of thoughts and actions
-    async for step_output in agent.astream(user_message):
-        # The final output is a string, intermediate steps are tool calls/thoughts
-        if isinstance(step_output, str):
-            history[-1] = (user_message, step_output)
-            yield history
-    return history
 with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
-    gr.Markdown("# Local Text-Editing Agent 📝")
     gr.Markdown(
         """
         Chat with this AI agent to perform complex edits on text documents in the workspace.
-        **Example:** "List the files. Then, open `sample.txt`, summarize the second paragraph, and correct any passive-voice sentences you find."
         """
     )
-    chatbot = gr.Chatbot(height=600)
-    msg_textbox = gr.Textbox(label="Your Prompt", placeholder="Type your request here...")
-    msg_textbox.submit(chat_fn, [chatbot, msg_textbox], chatbot)
-    gr.ClearButton([msg_textbox, chatbot])
     # Add a sample file to the workspace for easy testing
     with open(ROOT / "sample.txt", "w") as f:
-        f.write(textwrap.dedent("""
-            This is the first paragraph. It contains some basic information.
-            The second paragraph is where the interesting details are located. A decision was made by the team to proceed. This text will be reviewed by the agent for clarity and conciseness.
-            The final paragraph concludes the document.
-        """))
-# .queue() is essential for handling multiple users and streaming
-# share=True creates a public link for easy sharing from Colab or locally.
-demo.queue().launch(share=True)

+# app.py (ZeroGPU Version)
 # A production-quality, local, and uncensored text-editing agent
+# designed specifically for the Hugging Face ZeroGPU platform.
 import gradio as gr
 import pathlib
 import re
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from transformers.agents import Agent, Tool
 from fastmcp import FastMCP
+from huggingface_hub import snapshot_download
+# --- Hugging Face Spaces GPU Decorator ---
+# This is the key to making the app work on ZeroGPU
+from spaces import GPU as spaces_GPU
 # --- Configuration ---
 MODEL_ID = "NousResearch/Meta-Llama-3-8B-Instruct-GPTQ"
 ROOT = pathlib.Path("workspace")
+ROOT.mkdir(exist_ok=True)
 # --- 1. MCP Text-Editing Server (The "Tools" Backend) ---
+# This part remains the same.
 server = FastMCP("DocTools")
+# (All your @server.tool() functions: list_files, search_in_file, read_lines, patch_file go here)
+# ... [Paste your tool functions here to keep the script self-contained] ...
 @server.tool()
 def list_files(relative_path: str = ".") -> list[str]:
+    """Lists all files and directories within a given subdirectory of the workspace."""
     try:
         safe_path = (ROOT / relative_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()): return ["Error: Access denied."]
+        if not safe_path.exists(): return [f"Error: Directory '{relative_path}' not found."]
         return [p.name for p in safe_path.iterdir()]
+    except Exception as e: return [f"An error occurred: {str(e)}"]
 @server.tool()
 def search_in_file(file_path: str, pattern: str, max_hits: int = 40) -> list[str]:
+    """Searches for a regex pattern within a specified file in the workspace."""
     try:
         safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()): return ["Error: Access denied."]
+        if not safe_path.is_file(): return [f"Error: File '{file_path}' not found."]
+        output, regex = [], re.compile(pattern, re.IGNORECASE)
         with open(safe_path, 'r', encoding='utf-8') as f:
             for i, line in enumerate(f):
                 if regex.search(line):
                     output.append(f"{i+1}: {line.rstrip()}")
+                    if len(output) >= max_hits: break
         return output if output else ["No matches found."]
+    except Exception as e: return [f"An error occurred: {str(e)}"]
 @server.tool()
 def read_lines(file_path: str, start_line: int, end_line: int) -> str:
+    """Reads and returns a specific range of lines from a file."""
     try:
         safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()): return "Error: Access denied."
+        if not safe_path.is_file(): return f"Error: File '{file_path}' not found."
+        with open(safe_path, 'r', encoding='utf-8') as f: lines = f.readlines()
+        return "".join(lines[max(0, start_line - 1):min(len(lines), end_line)])
+    except Exception as e: return f"An error occurred: {str(e)}"]
 @server.tool()
 def patch_file(file_path: str, start_line: int, end_line: int, new_content: str) -> str:
+    """Replaces a range of lines in a file with new content."""
     try:
         safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()): return "Error: Access denied."
+        if not safe_path.is_file(): return f"Error: File '{file_path}' not found."
+        with open(safe_path, 'r', encoding='utf-8') as f: lines = f.readlines()
+        new_lines = (lines[:max(0, start_line - 1)] +
+                     [line + '\n' for line in new_content.splitlines()] +
+                     lines[end_line:])
+        with open(safe_path, 'w', encoding='utf-8') as f: f.writelines(new_lines)
+        return f"Success: Patched lines {start_line}-{end_line} in '{file_path}'."
+    except Exception as e: return f"An error occurred: {str(e)}"]
+# --- 2. Agent and Model Loading (ZeroGPU compatible) ---
+# We initialize the agent as None. It will be created on the first user request.
+agent = None
+# This is our GPU-accelerated function.
+# It will load the model on the first run and cache it for subsequent calls.
+@spaces_GPU(duration=120) # Request GPU for 120 seconds per call
+def get_agent():
+    """
+    Loads and caches the LLM agent. This function runs on a GPU.
+    """
+    global agent
+    if agent is None:
+        print("--- Loading model and agent for the first time ---")
+        # Download the model to a persistent cache
+        model_path = snapshot_download(MODEL_ID)
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            device_map="auto",
+            torch_dtype="auto" # Recommended for modern GPUs
         )
+        llm_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            return_full_text=False,
+            max_new_tokens=1024,
+        )
+        tools = [Tool(name=t, description=server.get_schema(t)["description"], inputs=server.get_schema(t)["parameters"], function=lambda **kwargs, t=t: server.invoke(t, **kwargs)) for t in server.list_tools()]
+        SYSTEM_PROMPT = textwrap.dedent("""
+            You are an expert technical editor. You must use your tools to answer the user's request.
+            All file paths are relative to the '/workspace' directory.
+            Always verify file contents with `read_lines` or `search_in_file` before patching.
+        """)
+        agent = Agent(
+            llm_pipeline=llm_pipeline,
+            tools=tools,
+            system_prompt=SYSTEM_PROMPT,
+            max_steps=10,
+            memory=True
+        )
+        print("--- Agent loaded successfully ---")
+    return agent
+# --- 3. Gradio Chat Application ---
+# Using gr.ChatInterface for a cleaner UI setup
 with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
+    gr.Markdown("# ZeroGPU Text-Editing Agent 📝")
     gr.Markdown(
         """
         Chat with this AI agent to perform complex edits on text documents in the workspace.
+        **Note:** The first request will have a delay as the model is loaded onto the GPU.
         """
     )
+    chatbot = gr.Chatbot(height=600, label="Agent Chat")
+    async def chat_interaction(message, history):
+        history.append([message, None])
+        yield "", history # Immediately show user message
+        # 1. Get the agent (this triggers the GPU)
+        current_agent = get_agent()
+        # 2. Stream the response
+        response = ""
+        async for step in current_agent.astream(message):
+            if isinstance(step, str):
+                response = step
+                history[-1][1] = response
+                yield "", history
+    gr.ChatInterface(
+        fn=chat_interaction,
+        chatbot=chatbot,
+        fill_height=False
+    )
     # Add a sample file to the workspace for easy testing
     with open(ROOT / "sample.txt", "w") as f:
+        f.write("This is a sample file for testing the ZeroGPU agent.")
+demo.queue().launch()