Spaces:

akhaliq
/

Isaac-0.1

Running on Zero

App Files Files Community

akhaliq HF Staff commited on Sep 21

Commit

45711b1

verified ·

1 Parent(s): 4eb07df

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -63

app.py CHANGED Viewed

@@ -8,46 +8,32 @@ from loguru import logger
 import gradio as gr
 import spaces
-# Prefer local repo package over any site-installed "perceptron" (adjust if needed)
-REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-if REPO_ROOT not in sys.path:
-    sys.path.insert(0, REPO_ROOT)
-from perceptron.tensorstream import VisionType
-from perceptron.tensorstream.ops import tensor_stream_token_view, modality_mask
-from perceptron.pointing.parser import extract_points
-# Global model and processor
-model = None
-processor = None
-device = None
-dtype = None
-config = None
-def load_model():
-    global model, processor, device, dtype, config
-    hf_path = "PerceptronAI/Isaac-0.1"
-    logger.info(f"Loading processor and config from HF checkpoint: {hf_path}")
-    config = AutoConfig.from_pretrained(hf_path, trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(hf_path, trust_remote_code=True, use_fast=False)
-    processor = AutoProcessor.from_pretrained(hf_path, trust_remote_code=True)
-    processor.tokenizer = tokenizer  # Ensure tokenizer is set
-    logger.info(f"Loading AutoModelForCausalLM from HF checkpoint: {hf_path}")
-    model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
-    model = model.to(device=device, dtype=dtype)
-    model.eval()
-    logger.info(f"Model loaded on {device} with dtype {dtype}")
-@spaces.GPU(duration=120)
-def init():
-    if model is None:
-        load_model()
-    return "Model loaded successfully"
 def document_to_messages(document, vision_token="<image>"):
     messages = []
@@ -117,9 +103,6 @@ def visualize_predictions(generated_text, image, output_path="prediction.jpeg"):
 @spaces.GPU(duration=120)
 def generate_response(image, prompt):
-    if model is None:
-        return "Model not loaded. Click 'Load Model' first.", None
     document = [
         {"type": "text", "content": "<hint>BOX</hint>", "role": "user"},
         {"type": "image", "content": image, "role": "user"},
@@ -151,33 +134,61 @@ def generate_response(image, prompt):
         else:
             return generated_text, None
-with gr.Blocks(title="HuggingFace Perceptron Demo") as demo:
-    gr.Markdown("# HuggingFace Perceptron Pipeline Demo")
     gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
     gr.Markdown("""
-    This demo shows how to use the Perceptron Isaac model for multimodal generation with text and images.
-    Upload an image and provide a prompt to generate responses with bounding box visualizations.
     """)
     with gr.Row():
-        load_btn = gr.Button("Load Model", variant="primary")
-    image_input = gr.Image(type="filepath", label="Upload Image", sources=["upload", "webcam"])
-    prompt_input = gr.Textbox(
-        label="Prompt",
-        value="Determine whether it is safe to cross the street. Look for signage and moving traffic.",
-        lines=3,
-        placeholder="Enter your prompt here..."
     )
-    with gr.Row():
-        generate_btn = gr.Button("Generate Response", variant="primary")
-    generated_text = gr.Textbox(label="Generated Text", lines=10)
-    visualized_image = gr.Image(label="Visualized Predictions (with Bounding Boxes)")
-    load_btn.click(init, outputs=gr.Textbox(value="Loading...", visible=False))
-    generate_btn.click(generate_response, inputs=[image_input, prompt_input], outputs=[generated_text, visualized_image])
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import spaces
+# Note: The perceptron package needs to be installed or included in the Space
+try:
+    from perceptron.tensorstream import VisionType
+    from perceptron.tensorstream.ops import tensor_stream_token_view, modality_mask
+    from perceptron.pointing.parser import extract_points
+except ImportError:
+    logger.error("perceptron package not found. Please ensure it's installed in your Hugging Face Space.")
+    raise
+# Load model at startup
+hf_path = "PerceptronAI/Isaac-0.1"
+logger.info(f"Loading processor and config from HF checkpoint: {hf_path}")
+config = AutoConfig.from_pretrained(hf_path, trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(hf_path, trust_remote_code=True, use_fast=False)
+processor = AutoProcessor.from_pretrained(hf_path, trust_remote_code=True)
+processor.tokenizer = tokenizer  # Ensure tokenizer is set
+logger.info(f"Loading AutoModelForCausalLM from HF checkpoint: {hf_path}")
+model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+model = model.to(device=device, dtype=dtype)
+model.eval()
+logger.info(f"Model loaded on {device} with dtype {dtype}")
 def document_to_messages(document, vision_token="<image>"):
     messages = []
 @spaces.GPU(duration=120)
 def generate_response(image, prompt):
     document = [
         {"type": "text", "content": "<hint>BOX</hint>", "role": "user"},
         {"type": "image", "content": image, "role": "user"},
         else:
             return generated_text, None
+# Example images and prompts
+examples = [
+    ["examples/street_scene.jpg", "Determine whether it is safe to cross the street. Look for signage and moving traffic."],
+    ["examples/kitchen.jpg", "Identify all the appliances visible in this kitchen."],
+    ["examples/document.jpg", "Extract the main text content from this document."],
+]
+with gr.Blocks(title="Perceptron Isaac Vision Model", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🔍 Perceptron Isaac Vision Model")
     gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
     gr.Markdown("""
+    This demo showcases the Perceptron Isaac-0.1 model for multimodal understanding with bounding box visualization.
+    Upload an image and provide a prompt to analyze the image and see detected objects with bounding boxes.
     """)
     with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                type="filepath",
+                label="Upload Image",
+                sources=["upload", "webcam", "clipboard"],
+                height=400
+            )
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                value="Determine whether it is safe to cross the street. Look for signage and moving traffic.",
+                lines=3,
+                placeholder="Enter your prompt here..."
+            )
+            generate_btn = gr.Button("🚀 Generate Response", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            visualized_image = gr.Image(
+                label="Visualized Predictions (with Bounding Boxes)",
+                height=400
+            )
+            generated_text = gr.Textbox(
+                label="Generated Text",
+                lines=10,
+                max_lines=20
+            )
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, prompt_input],
+        outputs=[generated_text, visualized_image],
+        fn=generate_response,
+        cache_examples=False
     )
+    generate_btn.click(
+        generate_response,
+        inputs=[image_input, prompt_input],
+        outputs=[generated_text, visualized_image]
+    )
 if __name__ == "__main__":
     demo.launch()