Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 27

Commit

5a13129

1 Parent(s): 69e6135

updatE

Browse files

Files changed (1) hide show

app.py +15 -44

app.py CHANGED Viewed

@@ -6,23 +6,25 @@ from threading import Thread
 import av
 import gradio as gr
-import spaces
 import torch
-from gradio.utils import get_upload_folder
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
 model_id = "unsloth/gemma-3n-E4B-it"
 processor = AutoProcessor.from_pretrained(model_id)
-model = AutoModelForImageTextToText.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
 IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
 VIDEO_FILE_TYPES = (".mp4", ".mov", ".webm")
 AUDIO_FILE_TYPES = (".mp3", ".wav")
-GRADIO_TEMP_DIR = get_upload_folder()
 TARGET_FPS = int(os.getenv("TARGET_FPS", "3"))
 MAX_FRAMES = int(os.getenv("MAX_FRAMES", "30"))
 MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
@@ -118,7 +120,6 @@ def process_new_user_message(message: dict) -> list[dict]:
             message["files"][0],
             target_fps=TARGET_FPS,
             max_frames=MAX_FRAMES,
-            parent_dir=GRADIO_TEMP_DIR,
         )
         paths = sorted(pathlib.Path(temp_dir).glob("*.jpg"))
         return [
@@ -152,7 +153,6 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
-@spaces.GPU(duration=120)
 @torch.inference_mode()
 def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
@@ -199,39 +199,14 @@ def generate(message: dict, history: list[dict], system_prompt: str = "", max_ne
         yield output
 examples = [
-    [
-        {
-            "text": "What is the capital of France?",
-            "files": [],
-        }
-    ],
-    [
-        {
-            "text": "Describe this image in detail.",
-            "files": ["assets/cat.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Transcribe the following speech segment in English.",
-            "files": ["assets/speech.wav"],
-        }
-    ],
-    [
-        {
-            "text": "Transcribe the following speech segment in English.",
-            "files": ["assets/speech2.wav"],
-        }
-    ],
-    [
-        {
-            "text": "Describe this video",
-            "files": ["assets/holding_phone.mp4"],
-        }
-    ],
 ]
 demo = gr.ChatInterface(
     fn=generate,
     type="messages",
@@ -245,13 +220,9 @@ demo = gr.ChatInterface(
         gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
     ],
-    stop_btn=False,
-    title="Gemma 3n E4B it",
     examples=examples,
-    run_examples_on_click=False,
-    cache_examples=False,
-    css_paths="style.css",
-    delete_cache=(1800, 1800),
 )
 if __name__ == "__main__":

 import av
 import gradio as gr
 import torch
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
+# Model configuration
 model_id = "unsloth/gemma-3n-E4B-it"
 processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForImageTextToText.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16
+)
+# Supported file types
 IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
 VIDEO_FILE_TYPES = (".mp4", ".mov", ".webm")
 AUDIO_FILE_TYPES = (".mp3", ".wav")
+# Video processing settings
 TARGET_FPS = int(os.getenv("TARGET_FPS", "3"))
 MAX_FRAMES = int(os.getenv("MAX_FRAMES", "30"))
 MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
             message["files"][0],
             target_fps=TARGET_FPS,
             max_frames=MAX_FRAMES,
         )
         paths = sorted(pathlib.Path(temp_dir).glob("*.jpg"))
         return [
     return messages
 @torch.inference_mode()
 def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
         yield output
+# Simple examples for the chat interface
 examples = [
+    "What is the capital of France?",
+    "Explain quantum computing in simple terms",
+    "Write a short story about a robot learning to paint"
 ]
+# Create the chat interface
 demo = gr.ChatInterface(
     fn=generate,
     type="messages",
         gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
         gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
     ],
+    title="Gemma 3n Multimodal Chat",
     examples=examples,
+    stop_btn=False,
 )
 if __name__ == "__main__":