Spaces:

knud055
/

phi-3.5

Running

App Files Files Community

knud055 commited on Jan 19

Commit

305f7a9

verified ·

1 Parent(s): 4e4f00a

Create app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+from openvino_genai import VLMPipeline
+from PIL import Image
+import numpy as np
+# 1. Load the OpenVINO Optimized Model (INT4 for CPU Speed)
+# We use a specific path/repo for the converted OpenVINO version
+model_path = "OpenVINO/gemma-3-4b-it-int4-ov"
+device = "CPU"
+print("Loading model... this may take a moment.")
+pipe = VLMPipeline(model_path, device)
+# 2. Define the Inference Function
+def generate_response(text_prompt, input_image=None):
+    try:
+        # Configuration for generation
+        config = {
+            "max_new_tokens": 512,
+            "do_sample": True,
+            "temperature": 0.7,
+            "top_p": 0.9,
+        }
+        # If an image is provided, the pipeline handles it natively
+        if input_image is not None:
+            # Gemma 3/VLM prompt formatting usually requires the image first
+            output = pipe.generate(text_prompt, image=input_image, **config)
+        else:
+            # Text-only mode
+            output = pipe.generate(text_prompt, **config)
+        return output
+    except Exception as e:
+        return f"Error: {str(e)}"
+# 3. Create the Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Gemma 3 4B - Discord Backend")
+    with gr.Row():
+        txt_input = gr.Textbox(label="Prompt")
+        img_input = gr.Image(type="pil", label="Image (Optional)")
+    output = gr.Textbox(label="Response")
+    submit_btn = gr.Button("Generate")
+    submit_btn.click(fn=generate_response, inputs=[txt_input, img_input], outputs=output)
+# 4. Launch (API is automatically enabled)
+demo.launch()