MultiAgent-System-for-Screenplay-Creation

Runtime error

App Files Files Community

luke9705 commited on Jun 4

Commit

5a4500e

verified ·

1 Parent(s): f16cdd3

Upload 2 files

Browse files

Files changed (2) hide show

app.py +145 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+import os
+import pandas as pd
+from PIL import Image
+from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpageTool, OpenAIServerModel, tool
+from typing import Optional
+import requests
+from io import BytesIO
+import re
+from pathlib import Path
+import openai
+## utilty functions
+def is_image_extension(filename: str) -> bool: # not used in the code, but useful to have
+    IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg'}
+    ext = os.path.splitext(filename)[1].lower() # os.path.splitext(path) returns (root, ext)
+    return ext in IMAGE_EXTS
+def load_file(path: list) -> dict:
+    """Based on the file extension, load the file into a suitable object."""
+    image = None
+    excel = None
+    csv = None
+    text = None
+    ext = Path(path).suffix.lower() # same as os.path.splitext(filename)[1].lower()
+    print(f"ext: {ext}")
+    if ext.endswith(".png") or ext.endswith(".jpg") or ext.endswith(".jpeg"):
+        image = Image.open(path).convert("RGB")            # pillow object
+    elif ext.endswith(".xlsx") or ext.endswith(".xls"):
+        excel = pd.read_excel(path)                        # DataFrame
+    elif ext.endswith(".csv"):
+        csv = pd.read_csv(path)                          # DataFrame
+    elif ext.endswith(".py") or ext.endswith(".txt"):
+        with open(path, 'r') as f:
+            text = f.read()                               # plain text str
+    elif ext.endswith(".mp3") or ext.endswith(".wav"):
+        with open(path, 'wb') as f:
+            f.write("output.mp3")                         # binary data (leave it hardcoded for now)
+    return {"image" : image, "excel": excel, "csv": csv, "raw text": text}
+## tools definition
+@tool
+def download_images(image_urls: str) -> list:
+    """
+    Download web images from the given comma‐separated URLs and return them in a list of PIL Images.
+    Args:
+        image_urls: comma‐separated list of URLs to download
+    Returns:
+        List of PIL.Image.Image objects
+    """
+    urls = [u.strip() for u in image_urls.split(",") if u.strip()] # strip() removes whitespaces
+    images = []
+    for __, url in enumerate(urls, start=1): # enumerate seems not needed... keeping it for now
+        try:
+            # Fetch the image bytes
+            resp = requests.get(url, timeout=10)
+            resp.raise_for_status()
+            # Load into a PIL image
+            img = Image.open(BytesIO(resp.content)).convert("RGB")
+            images.append(img)
+        except Exception as e:
+            print(f"Failed to download from {url}: {e}")
+    return images
+@tool # since they gave us OpenAI API credits, we can keep using it
+def transcribe_audio() -> str:
+    """
+    Transcribe audio file using OpenAI Whisper API.
+    The path to the audio file is hardcoded as "output.mp3". Don't need to pass it as an argument.
+    Returns:
+        str: Transcription of the audio.
+    """
+    client = openai.Client(api_key=os.getenv("OPEN_AI_API_KEY"))
+    with open("output.mp3", "rb") as audio:                 # to modify path because it is arriving from gradio
+        transcript = client.audio.transcriptions.create(
+        file=audio,
+        model="whisper-1",
+        response_format="text",
+    )
+    print(transcript)
+    try:
+        return transcript
+    except Exception as e:
+        print(f"Error transcribing audio: {e}")
+## agent definition
+class Agent:
+    def __init__(self, ):
+        client = HfApiModel("google/gemma-3-27b-it", provider="nebius", api_key=os.getenv("NEBIUS_API_KEY"))
+        self.agent = CodeAgent(
+            model=client,
+            tools=[DuckDuckGoSearchTool(max_results=5), VisitWebpageTool(max_output_length=20000), download_images, transcribe_audio],
+            additional_authorized_imports=["pandas", "PIL", "io"],
+            planning_interval=1,
+            max_steps=5,
+        )
+        #self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"]
+        #print("System prompt:", self.agent.prompt_templates["system_prompt"])
+    def __call__(self, message: str, images: Optional[list[Image.Image]] = None, files: Optional[str] = None) -> str:
+        answer = self.agent.run(message, additional_args={"images": images ,"files": files})
+        return answer
+## gradio functions
+def respond(message, history):
+    text = message.get("text", "")
+    if not message.get("files"):
+        print("No files received.")
+        message = agent(text)
+    else:
+        files = message.get("files", [])
+        print(f"files received: {files}")
+        file = load_file(files[0])
+        message = agent(text, files=file)
+    return message
+def initialize_agent():
+    agent = Agent()
+    print("Agent initialized.")
+    return agent
+with gr.Blocks() as demo:
+    global agent
+    agent = initialize_agent()
+    gr.ChatInterface(
+                        fn=respond,
+                        type='messages',
+                        multimodal=True,
+                        title='MultiAgent_System_for_Screenplay_Creation_and_Editing',
+                        show_progress='full'
+                    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+huggingface_hub==0.25.2
+smolagents
+openai