Spaces:

doodle-med
/

Text-agent

Sleeping

App Files Files Community

doodle-med commited on Jul 9, 2025

Commit

a90bd60

verified ·

1 Parent(s): 99e0b55

Create App.py

Browse files

Files changed (1) hide show

App.py +250 -0

App.py ADDED Viewed

	@@ -0,0 +1,250 @@

+# app.py
+# A production-quality, local, and uncensored text-editing agent
+# that can read, reason over, and rewrite large documents.
+import asyncio
+import gradio as gr
+import pathlib
+import re
+import textwrap
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from transformers.agents import Agent, Tool
+from fastmcp import FastMCP
+# --- Configuration ---
+# Use a more descriptive model name for clarity
+MODEL_ID = "NousResearch/Meta-Llama-3-8B-Instruct-GPTQ"
+# Sandbox all file operations to this directory for security
+ROOT = pathlib.Path("workspace")
+ROOT.mkdir(exist_ok=True) # Ensure the workspace directory exists
+# --- 1. MCP Text-Editing Server (The "Tools" Backend) ---
+# This server runs locally and provides the agent with tools to interact with files.
+server = FastMCP("DocTools")
+@server.tool()
+def list_files(relative_path: str = ".") -> list[str]:
+    """
+    Lists all files and directories within a given subdirectory of the workspace.
+    Args:
+        relative_path (str): The subdirectory path relative to the workspace root.
+                             Defaults to the current directory ('.').
+    """
+    try:
+        # Security: Prevent directory traversal attacks
+        safe_path = (ROOT / relative_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()):
+            return ["Error: Access denied. Path is outside the workspace."]
+        if not safe_path.exists():
+            return [f"Error: Directory '{relative_path}' not found."]
+        return [p.name for p in safe_path.iterdir()]
+    except Exception as e:
+        return [f"An error occurred: {str(e)}"]
+@server.tool()
+def search_in_file(file_path: str, pattern: str, max_hits: int = 40) -> list[str]:
+    """
+    Searches for a regex pattern within a specified file in the workspace.
+    Args:
+        file_path (str): The path to the file relative to the workspace root.
+        pattern (str): The regular expression pattern to search for (case-insensitive).
+        max_hits (int): The maximum number of matching lines to return.
+    """
+    try:
+        # Security: Resolve and check the file path
+        safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()):
+            return ["Error: Access denied. Path is outside the workspace."]
+        if not safe_path.is_file():
+            return [f"Error: File '{file_path}' not found."]
+        output = []
+        regex = re.compile(pattern, re.IGNORECASE)
+        with open(safe_path, 'r', encoding='utf-8') as f:
+            for i, line in enumerate(f):
+                if regex.search(line):
+                    output.append(f"{i+1}: {line.rstrip()}")
+                    if len(output) >= max_hits:
+                        break
+        return output if output else ["No matches found."]
+    except Exception as e:
+        return [f"An error occurred while reading the file: {str(e)}"]
+@server.tool()
+def read_lines(file_path: str, start_line: int, end_line: int) -> str:
+    """
+    Reads and returns a specific range of lines from a file.
+    Args:
+        file_path (str): The path to the file relative to the workspace root.
+        start_line (int): The starting line number (1-indexed).
+        end_line (int): The ending line number (inclusive).
+    """
+    try:
+        # Security: Resolve and check the file path
+        safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()):
+            return "Error: Access denied. Path is outside the workspace."
+        if not safe_path.is_file():
+            return f"Error: File '{file_path}' not found."
+        with open(safe_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        # Adjust for 0-based indexing and ensure bounds are valid
+        start_index = max(0, start_line - 1)
+        end_index = min(len(lines), end_line)
+        return "".join(lines[start_index:end_index])
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+@server.tool()
+def patch_file(file_path: str, start_line: int, end_line: int, new_content: str) -> str:
+    """
+    Replaces a range of lines in a file with new content.
+    Args:
+        file_path (str): The path to the file relative to the workspace root.
+        start_line (int): The starting line number for replacement (1-indexed).
+        end_line (int): The ending line number for replacement (inclusive).
+        new_content (str): The new text to insert.
+    """
+    try:
+        # Security: Resolve and check the file path
+        safe_path = (ROOT / file_path).resolve()
+        if not safe_path.is_relative_to(ROOT.resolve()):
+            return "Error: Access denied. Path is outside the workspace."
+        if not safe_path.is_file():
+            return f"Error: File '{file_path}' not found."
+        with open(safe_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        start_index = max(0, start_line - 1)
+        # Create the new file content in memory
+        new_lines = (
+            lines[:start_index] +
+            [line + '\n' for line in new_content.splitlines()] +
+            lines[end_line:]
+        )
+        with open(safe_path, 'w', encoding='utf-8') as f:
+            f.writelines(new_lines)
+        return f"Success: Patched lines {start_line}-{end_line} in '{file_path}'."
+    except Exception as e:
+        return f"An error occurred during patching: {str(e)}"
+# --- 2. Local Function-Calling LLM ---
+# Initialize the model and tokenizer for the agent.
+# Using a GPTQ quantized model for efficient inference on GPUs.
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    device_map="auto",
+    # 8-bit quantization for a balance of speed and performance.
+    quantization_config={"bits": 8, "load_in_8bit": True}
+)
+# Create the pipeline for text generation with streaming capabilities.
+llm_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    return_full_text=False, # Essential for streaming and agent control
+    max_new_tokens=1024,
+)
+# --- 3. Transformers Agent Orchestrator ---
+# This agent coordinates the LLM and the tools to accomplish user goals.
+def build_hf_tool(mcp_tool_name: str) -> Tool:
+    """Dynamically creates a Hugging Face Tool from a FastMCP tool's schema."""
+    schema = server.get_schema(mcp_tool_name)
+    # The actual function that the agent will call
+    def tool_function(**kwargs):
+        # The FastMCP server handles the invocation internally
+        return server.invoke(mcp_tool_name, **kwargs)
+    return Tool(
+        name=mcp_tool_name,
+        description=schema["description"],
+        inputs=schema["parameters"],
+        function=tool_function
+    )
+# Automatically build HF tools from all registered MCP server tools
+tools = [build_hf_tool(tool_name) for tool_name in server.list_tools()]
+# System prompt to define the agent's role and constraints
+SYSTEM_PROMPT = textwrap.dedent("""
+    You are an expert technical editor and programmer.
+    Your task is to assist the user by performing file operations.
+    You have access to a set of tools for listing, searching, reading, and modifying files.
+    - All file paths are relative to the '/workspace' directory.
+    - Always verify file contents with `read_lines` or `search_in_file` before attempting to modify a file with `patch_file`.
+    - When you are done, provide a summary of the actions you have taken.
+""")
+# Initialize the agent with the LLM, tools, and a system prompt.
+# memory=True enables conversational history.
+agent = Agent(
+    llm_pipeline=llm_pipeline,
+    tools=tools,
+    system_prompt=SYSTEM_PROMPT,
+    max_steps=10, # Increased max steps for more complex tasks
+    memory=True
+)
+# --- 4. Interactive Gradio Chat Application ---
+async def chat_fn(history: list, user_message: str):
+    """
+    Handles the chat interaction, streaming the agent's response back to the UI.
+    """
+    history.append((user_message, None))
+    # Use astream for real-time streaming of thoughts and actions
+    async for step_output in agent.astream(user_message):
+        # The final output is a string, intermediate steps are tool calls/thoughts
+        if isinstance(step_output, str):
+            history[-1] = (user_message, step_output)
+            yield history
+    return history
+with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
+    gr.Markdown("# Local Text-Editing Agent 📝")
+    gr.Markdown(
+        """
+        Chat with this AI agent to perform complex edits on text documents in the workspace.
+        **Example:** "List the files. Then, open `sample.txt`, summarize the second paragraph, and correct any passive-voice sentences you find."
+        """
+    )
+    chatbot = gr.Chatbot(height=600)
+    msg_textbox = gr.Textbox(label="Your Prompt", placeholder="Type your request here...")
+    msg_textbox.submit(chat_fn, [chatbot, msg_textbox], chatbot)
+    gr.ClearButton([msg_textbox, chatbot])
+    # Add a sample file to the workspace for easy testing
+    with open(ROOT / "sample.txt", "w") as f:
+        f.write(textwrap.dedent("""
+            This is the first paragraph. It contains some basic information.
+            The second paragraph is where the interesting details are located. A decision was made by the team to proceed. This text will be reviewed by the agent for clarity and conciseness.
+            The final paragraph concludes the document.
+        """))
+# .queue() is essential for handling multiple users and streaming
+# share=True creates a public link for easy sharing from Colab or locally.
+demo.queue().launch(share=True)