Spaces:

DivyanshHF
/

VisionLLM

Runtime error

App Files Files Community

DivyanshHF commited on Aug 10

Commit

89f55ad

verified ·

1 Parent(s): a709033

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -31

app.py CHANGED Viewed

@@ -1,49 +1,58 @@
 import os
 import sys
 import types
-import gradio as gr
 from PIL import Image
-# ======================
-#  Disable FlashAttention
-# ======================
-sys.modules["flash_attn"] = types.ModuleType("flash_attn")
-sys.modules["flash_attn.flash_attn_interface"] = types.ModuleType("flash_attn.flash_attn_interface")
 def _dummy_func(*args, **kwargs):
-    raise RuntimeError("FlashAttention is not available in this environment.")
-sys.modules["flash_attn.flash_attn_interface"].flash_attn_unpadded_qkvpacked_func = _dummy_func
-sys.modules["flash_attn.flash_attn_interface"].flash_attn_varlen_qkvpacked_func = _dummy_func
-# ======================
-#  CPU-only settings
-# ======================
-os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
 os.environ.setdefault("FLASH_ATTENTION", "0")
 os.environ.setdefault("XFORMERS_DISABLED", "1")
 os.environ.setdefault("ACCELERATE_USE_DEVICE_MAP", "0")
-# ======================
-#  VILA imports
-# ======================
 from llava.model.builder import load_pretrained_model
 from llava.constants import DEFAULT_IMAGE_TOKEN
 MODEL_PATH = "Efficient-Large-Model/VILA1.5-3b"
 tokenizer, model, image_processor, context_len = load_pretrained_model(
     MODEL_PATH, model_name="", model_base=None
 )
-# Add fallback chat template if missing
 if getattr(tokenizer, "chat_template", None) is None:
     tokenizer.chat_template = (
         "{% for message in messages %}{{ message['role'] | upper }}: "
         "{{ message['content'] }}\n{% endfor %}ASSISTANT:"
     )
-def vila_infer(image, prompt, max_new_tokens, temperature):
     if image is None:
         return "Please upload an image."
     if not prompt.strip():
@@ -51,7 +60,6 @@ def vila_infer(image, prompt, max_new_tokens, temperature):
     pil = Image.fromarray(image).convert("RGB")
-    # Minimal conversation: image + prompt
     out = model.generate_content(
         prompt=[{
             "from": "human",
@@ -60,25 +68,20 @@ def vila_infer(image, prompt, max_new_tokens, temperature):
                 {"type": "text", "value": prompt}
             ]
         }],
-        generation_config={"max_new_tokens": max_new_tokens, "temperature": temperature}
     )
     return str(out)
-with gr.Blocks(title="VILA 1.5 3B (CPU, HF Space)") as demo:
-    gr.Markdown("## 🖼️ VILA-1.5-3B — Image Captioning\nUpload an image and get a description.")
     with gr.Row():
         img = gr.Image(type="numpy", label="Image", height=320)
         prompt = gr.Textbox(label="Prompt", value="Please describe the image", lines=2)
-    with gr.Row():
-        max_new = gr.Slider(16, 256, value=96, step=1, label="Max new tokens")
-        temp = gr.Slider(0.0, 1.0, value=0.0, step=0.1, label="Temperature")
     btn = gr.Button("Run")
     out = gr.Textbox(label="Output", lines=8)
-    btn.click(vila_infer, [img, prompt, max_new, temp], out)
 demo.launch()

 import os
 import sys
 import types
+import importlib.machinery
 from PIL import Image
+import gradio as gr
+# ===============================
+# Patch flash_attn for CPU runtime
+# ===============================
+dummy_flash_attn = types.ModuleType("flash_attn")
+dummy_flash_attn.__spec__ = importlib.machinery.ModuleSpec("flash_attn", loader=None)
+dummy_interface = types.ModuleType("flash_attn.flash_attn_interface")
+dummy_interface.__spec__ = importlib.machinery.ModuleSpec(
+    "flash_attn.flash_attn_interface", loader=None
+)
 def _dummy_func(*args, **kwargs):
+    raise RuntimeError("flash_attn is not available in this environment.")
+dummy_interface.flash_attn_unpadded_qkvpacked_func = _dummy_func
+dummy_interface.flash_attn_varlen_qkvpacked_func = _dummy_func
+sys.modules["flash_attn"] = dummy_flash_attn
+sys.modules["flash_attn.flash_attn_interface"] = dummy_interface
+# ===============================
+# Hugging Face model setup
+# ===============================
 os.environ.setdefault("FLASH_ATTENTION", "0")
 os.environ.setdefault("XFORMERS_DISABLED", "1")
 os.environ.setdefault("ACCELERATE_USE_DEVICE_MAP", "0")
 from llava.model.builder import load_pretrained_model
 from llava.constants import DEFAULT_IMAGE_TOKEN
 MODEL_PATH = "Efficient-Large-Model/VILA1.5-3b"
+# Load model + tokenizer + image processor
 tokenizer, model, image_processor, context_len = load_pretrained_model(
     MODEL_PATH, model_name="", model_base=None
 )
+# Add a fallback chat template
 if getattr(tokenizer, "chat_template", None) is None:
     tokenizer.chat_template = (
         "{% for message in messages %}{{ message['role'] | upper }}: "
         "{{ message['content'] }}\n{% endfor %}ASSISTANT:"
     )
+# ===============================
+# Inference function
+# ===============================
+def vila_infer(image, prompt):
     if image is None:
         return "Please upload an image."
     if not prompt.strip():
     pil = Image.fromarray(image).convert("RGB")
     out = model.generate_content(
         prompt=[{
             "from": "human",
                 {"type": "text", "value": prompt}
             ]
         }],
+        generation_config=None
     )
     return str(out)
+# ===============================
+# Gradio UI
+# ===============================
+with gr.Blocks(title="VILA 1.5 3B (HF Space)") as demo:
+    gr.Markdown("## 🖼️ VILA-1.5-3B Image Description Demo\nUpload an image and get a description.")
     with gr.Row():
         img = gr.Image(type="numpy", label="Image", height=320)
         prompt = gr.Textbox(label="Prompt", value="Please describe the image", lines=2)
     btn = gr.Button("Run")
     out = gr.Textbox(label="Output", lines=8)
+    btn.click(vila_infer, [img, prompt], out)
 demo.launch()