Spaces:

reverseforward
/

newtestspace

Sleeping

App Files Files Community

reveseforward commited on Oct 24, 2025

Commit

29b207e

1 Parent(s): 6f7684a

save4

Browse files

Files changed (1) hide show

app.py +66 -37

app.py CHANGED Viewed

@@ -3,62 +3,93 @@ from transformers import AutoProcessor, AutoModelForVision2Seq
 from huggingface_hub import login
 import gradio as gr
 import os
 # ----------------------------
 # AUTHENTICATION
 # ----------------------------
-# Option 1: Use HF token from environment variable (recommended for Spaces)
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
 else:
-    # Option 2: Interactive login (for local testing)
     print("No HF_TOKEN found. Please log in manually.")
     login()
 # ----------------------------
 # CONFIG
 # ----------------------------
-MODEL_NAME = "reverseforward/inferencemodel"  # change this to your repo name
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-DTYPE = torch.float16  # use float16 on A10G
 # ----------------------------
-# LOAD MODEL
 # ----------------------------
-print("Loading model...")
-model = AutoModelForVision2Seq.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=DTYPE,
-    device_map="auto",
-    token=HF_TOKEN,  # Pass token explicitly
-)
-processor = AutoProcessor.from_pretrained(
-    MODEL_NAME,
-    token=HF_TOKEN,
-)
-print("Model loaded successfully.")
 # ----------------------------
 # INFERENCE FUNCTION
 # ----------------------------
 def chat_with_image(image, text):
-    if image is None or text.strip() == "":
-        return "Please provide both an image and text input."
-    # Prepare inputs for Qwen3-VL
-    inputs = processor(text=[text], images=[image], return_tensors="pt").to(DEVICE, DTYPE)
-    # Generate output
-    with torch.inference_mode():
-        generated_ids = model.generate(
-            **inputs,
-            max_new_tokens=256,
-            temperature=0.7,
-        )
-    output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return output.strip()
 # ----------------------------
@@ -74,16 +105,14 @@ demo = gr.Interface(
     fn=chat_with_image,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(label="Enter Instruction or Question"),
     ],
-    outputs=gr.Textbox(label="Model Output"),
     title=title,
     description=description,
-    examples=[
-        ["examples/cat.jpg", "Describe this image."],
-        ["examples/room.jpg", "How many chairs are visible?"],
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import login
 import gradio as gr
 import os
+import gc
 # ----------------------------
 # AUTHENTICATION
 # ----------------------------
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
 else:
     print("No HF_TOKEN found. Please log in manually.")
     login()
 # ----------------------------
 # CONFIG
 # ----------------------------
+MODEL_NAME = "reverseforward/inferencemodel"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float16
+# Clear cache before loading
+gc.collect()
+if DEVICE == "cuda":
+    torch.cuda.empty_cache()
 # ----------------------------
+# LOAD MODEL (with error handling)
 # ----------------------------
+print(f"Loading model on {DEVICE}...")
+try:
+    model = AutoModelForVision2Seq.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=DTYPE,
+        device_map="auto",
+        token=HF_TOKEN,
+        low_cpu_mem_usage=True,  # Reduce memory usage
+    )
+    processor = AutoProcessor.from_pretrained(
+        MODEL_NAME,
+        token=HF_TOKEN,
+    )
+    print("✓ Model loaded successfully.")
+except Exception as e:
+    print(f"✗ Error loading model: {e}")
+    raise
 # ----------------------------
 # INFERENCE FUNCTION
 # ----------------------------
 def chat_with_image(image, text):
+    try:
+        if image is None or text.strip() == "":
+            return "Please provide both an image and text input."
+        # Clear memory before inference
+        gc.collect()
+        if DEVICE == "cuda":
+            torch.cuda.empty_cache()
+        # Prepare inputs
+        inputs = processor(
+            text=[text],
+            images=[image],
+            return_tensors="pt"
+        ).to(DEVICE, DTYPE)
+        # Generate output
+        with torch.inference_mode():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=256,
+                temperature=0.7,
+                do_sample=True,
+            )
+        output = processor.batch_decode(
+            generated_ids,
+            skip_special_tokens=True
+        )[0]
+        # Clean up
+        del inputs, generated_ids
+        gc.collect()
+        return output.strip()
+    except Exception as e:
+        return f"Error during inference: {str(e)}"
 # ----------------------------
     fn=chat_with_image,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Enter Instruction or Question", lines=3),
     ],
+    outputs=gr.Textbox(label="Model Output", lines=5),
     title=title,
     description=description,
+    allow_flagging="never",  # Disable flagging to reduce overhead
 )
 if __name__ == "__main__":
+    demo.launch(show_error=True)