Spaces:

ha7naa
/

SeeAndAsk

Sleeping

App Files Files Community

ha7naa commited on Feb 3

Commit

8fb5e72

verified ·

1 Parent(s): edd79f1

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -36

app.py CHANGED Viewed

@@ -10,67 +10,65 @@ from google.genai import types
 # 1. Gemini Client
 # ==============================
 client = genai.Client(
-    api_key=os.environ["GEMINI_API_KEY"]
 )
 # ==============================
-# 2. Safe Multimodal Function
 # ==============================
 def analyze_image(image, prompt):
-    # ---- SAFETY CHECKS ----
-    if image is None:
-        return "❌ Please upload an image."
-    if prompt is None or prompt.strip() == "":
-        return "❌ Please enter a text prompt."
-    # Convert image to bytes
-    buffer = io.BytesIO()
-    image.save(buffer, format="PNG")
-    image_bytes = buffer.getvalue()
-    # Create Gemini image part
-    image_part = types.Part.from_bytes(
-        data=image_bytes,
-        mime_type="image/png"
-    )
-    # Generate response
-    response = client.models.generate_content(
-        model="gemini-1.5-pro",
-        contents=[
-            prompt,
-            image_part
-        ]
-    )
-    return response.text
 # ==============================
-# 3. Gradio Interface
 # ==============================
 interface = gr.Interface(
     fn=analyze_image,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(
-            label="Prompt",
-            placeholder="Describe the image"
-        )
     ],
-    outputs=gr.Textbox(label="Gemini Response"),
-    title="Multimodal AI App (Gemini)",
-    description="Upload an image and ask a simple question."
 )
 # ==============================
 # 4. Launch
 # ==============================
-interface.launch()

 # 1. Gemini Client
 # ==============================
 client = genai.Client(
+    api_key=os.environ.get("GEMINI_API_KEY")
 )
 # ==============================
+# 2. Multimodal Function (SAFE)
 # ==============================
 def analyze_image(image, prompt):
+    try:
+        # ---- Checks ----
+        if image is None:
+            return "❌ No image uploaded"
+        if not prompt or prompt.strip() == "":
+            return "❌ Prompt is empty"
+        # Convert image to bytes
+        buffer = io.BytesIO()
+        image.save(buffer, format="PNG")
+        image_bytes = buffer.getvalue()
+        image_part = types.Part.from_bytes(
+            data=image_bytes,
+            mime_type="image/png"
+        )
+        # ✅ USE FLASH MODEL (IMPORTANT)
+        response = client.models.generate_content(
+            model="gemini-1.5-flash",
+            contents=[
+                prompt,
+                image_part
+            ]
+        )
+        return response.text
+    except Exception as e:
+        # 🔍 Show real Gemini error
+        return f"❌ Gemini Error:\n{str(e)}"
 # ==============================
+# 3. Gradio UI
 # ==============================
 interface = gr.Interface(
     fn=analyze_image,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Prompt", value="Describe the image")
     ],
+    outputs=gr.Textbox(label="Response"),
+    title="Gemini Multimodal Test App",
 )
 # ==============================
 # 4. Launch
 # ==============================
+interface.launch(ssr_mode=False)