akhaliq HF Staff commited on
Commit
45711b1
Β·
verified Β·
1 Parent(s): 4eb07df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -63
app.py CHANGED
@@ -8,46 +8,32 @@ from loguru import logger
8
  import gradio as gr
9
  import spaces
10
 
11
- # Prefer local repo package over any site-installed "perceptron" (adjust if needed)
12
- REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
13
- if REPO_ROOT not in sys.path:
14
- sys.path.insert(0, REPO_ROOT)
15
-
16
- from perceptron.tensorstream import VisionType
17
- from perceptron.tensorstream.ops import tensor_stream_token_view, modality_mask
18
- from perceptron.pointing.parser import extract_points
19
-
20
- # Global model and processor
21
- model = None
22
- processor = None
23
- device = None
24
- dtype = None
25
- config = None
26
-
27
- def load_model():
28
- global model, processor, device, dtype, config
29
- hf_path = "PerceptronAI/Isaac-0.1"
30
- logger.info(f"Loading processor and config from HF checkpoint: {hf_path}")
31
- config = AutoConfig.from_pretrained(hf_path, trust_remote_code=True)
32
- tokenizer = AutoTokenizer.from_pretrained(hf_path, trust_remote_code=True, use_fast=False)
33
- processor = AutoProcessor.from_pretrained(hf_path, trust_remote_code=True)
34
- processor.tokenizer = tokenizer # Ensure tokenizer is set
35
-
36
- logger.info(f"Loading AutoModelForCausalLM from HF checkpoint: {hf_path}")
37
- model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True)
38
-
39
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
- dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
41
- model = model.to(device=device, dtype=dtype)
42
- model.eval()
43
-
44
- logger.info(f"Model loaded on {device} with dtype {dtype}")
45
-
46
- @spaces.GPU(duration=120)
47
- def init():
48
- if model is None:
49
- load_model()
50
- return "Model loaded successfully"
51
 
52
  def document_to_messages(document, vision_token="<image>"):
53
  messages = []
@@ -117,9 +103,6 @@ def visualize_predictions(generated_text, image, output_path="prediction.jpeg"):
117
 
118
  @spaces.GPU(duration=120)
119
  def generate_response(image, prompt):
120
- if model is None:
121
- return "Model not loaded. Click 'Load Model' first.", None
122
-
123
  document = [
124
  {"type": "text", "content": "<hint>BOX</hint>", "role": "user"},
125
  {"type": "image", "content": image, "role": "user"},
@@ -151,33 +134,61 @@ def generate_response(image, prompt):
151
  else:
152
  return generated_text, None
153
 
154
- with gr.Blocks(title="HuggingFace Perceptron Demo") as demo:
155
- gr.Markdown("# HuggingFace Perceptron Pipeline Demo")
 
 
 
 
 
 
 
156
  gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
157
  gr.Markdown("""
158
- This demo shows how to use the Perceptron Isaac model for multimodal generation with text and images.
159
- Upload an image and provide a prompt to generate responses with bounding box visualizations.
160
  """)
161
 
162
  with gr.Row():
163
- load_btn = gr.Button("Load Model", variant="primary")
164
-
165
- image_input = gr.Image(type="filepath", label="Upload Image", sources=["upload", "webcam"])
166
- prompt_input = gr.Textbox(
167
- label="Prompt",
168
- value="Determine whether it is safe to cross the street. Look for signage and moving traffic.",
169
- lines=3,
170
- placeholder="Enter your prompt here..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  )
172
 
173
- with gr.Row():
174
- generate_btn = gr.Button("Generate Response", variant="primary")
175
-
176
- generated_text = gr.Textbox(label="Generated Text", lines=10)
177
- visualized_image = gr.Image(label="Visualized Predictions (with Bounding Boxes)")
178
-
179
- load_btn.click(init, outputs=gr.Textbox(value="Loading...", visible=False))
180
- generate_btn.click(generate_response, inputs=[image_input, prompt_input], outputs=[generated_text, visualized_image])
181
 
182
  if __name__ == "__main__":
183
  demo.launch()
 
8
  import gradio as gr
9
  import spaces
10
 
11
+ # Note: The perceptron package needs to be installed or included in the Space
12
+ try:
13
+ from perceptron.tensorstream import VisionType
14
+ from perceptron.tensorstream.ops import tensor_stream_token_view, modality_mask
15
+ from perceptron.pointing.parser import extract_points
16
+ except ImportError:
17
+ logger.error("perceptron package not found. Please ensure it's installed in your Hugging Face Space.")
18
+ raise
19
+
20
+ # Load model at startup
21
+ hf_path = "PerceptronAI/Isaac-0.1"
22
+ logger.info(f"Loading processor and config from HF checkpoint: {hf_path}")
23
+ config = AutoConfig.from_pretrained(hf_path, trust_remote_code=True)
24
+ tokenizer = AutoTokenizer.from_pretrained(hf_path, trust_remote_code=True, use_fast=False)
25
+ processor = AutoProcessor.from_pretrained(hf_path, trust_remote_code=True)
26
+ processor.tokenizer = tokenizer # Ensure tokenizer is set
27
+
28
+ logger.info(f"Loading AutoModelForCausalLM from HF checkpoint: {hf_path}")
29
+ model = AutoModelForCausalLM.from_pretrained(hf_path, trust_remote_code=True)
30
+
31
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+ dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
33
+ model = model.to(device=device, dtype=dtype)
34
+ model.eval()
35
+
36
+ logger.info(f"Model loaded on {device} with dtype {dtype}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def document_to_messages(document, vision_token="<image>"):
39
  messages = []
 
103
 
104
  @spaces.GPU(duration=120)
105
  def generate_response(image, prompt):
 
 
 
106
  document = [
107
  {"type": "text", "content": "<hint>BOX</hint>", "role": "user"},
108
  {"type": "image", "content": image, "role": "user"},
 
134
  else:
135
  return generated_text, None
136
 
137
+ # Example images and prompts
138
+ examples = [
139
+ ["examples/street_scene.jpg", "Determine whether it is safe to cross the street. Look for signage and moving traffic."],
140
+ ["examples/kitchen.jpg", "Identify all the appliances visible in this kitchen."],
141
+ ["examples/document.jpg", "Extract the main text content from this document."],
142
+ ]
143
+
144
+ with gr.Blocks(title="Perceptron Isaac Vision Model", theme=gr.themes.Soft()) as demo:
145
+ gr.Markdown("# πŸ” Perceptron Isaac Vision Model")
146
  gr.Markdown("Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
147
  gr.Markdown("""
148
+ This demo showcases the Perceptron Isaac-0.1 model for multimodal understanding with bounding box visualization.
149
+ Upload an image and provide a prompt to analyze the image and see detected objects with bounding boxes.
150
  """)
151
 
152
  with gr.Row():
153
+ with gr.Column(scale=1):
154
+ image_input = gr.Image(
155
+ type="filepath",
156
+ label="Upload Image",
157
+ sources=["upload", "webcam", "clipboard"],
158
+ height=400
159
+ )
160
+ prompt_input = gr.Textbox(
161
+ label="Prompt",
162
+ value="Determine whether it is safe to cross the street. Look for signage and moving traffic.",
163
+ lines=3,
164
+ placeholder="Enter your prompt here..."
165
+ )
166
+ generate_btn = gr.Button("πŸš€ Generate Response", variant="primary", size="lg")
167
+
168
+ with gr.Column(scale=1):
169
+ visualized_image = gr.Image(
170
+ label="Visualized Predictions (with Bounding Boxes)",
171
+ height=400
172
+ )
173
+ generated_text = gr.Textbox(
174
+ label="Generated Text",
175
+ lines=10,
176
+ max_lines=20
177
+ )
178
+
179
+ gr.Examples(
180
+ examples=examples,
181
+ inputs=[image_input, prompt_input],
182
+ outputs=[generated_text, visualized_image],
183
+ fn=generate_response,
184
+ cache_examples=False
185
  )
186
 
187
+ generate_btn.click(
188
+ generate_response,
189
+ inputs=[image_input, prompt_input],
190
+ outputs=[generated_text, visualized_image]
191
+ )
 
 
 
192
 
193
  if __name__ == "__main__":
194
  demo.launch()