knud055 commited on
Commit
305f7a9
·
verified ·
1 Parent(s): 4e4f00a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from openvino_genai import VLMPipeline
3
+ from PIL import Image
4
+ import numpy as np
5
+
6
+ # 1. Load the OpenVINO Optimized Model (INT4 for CPU Speed)
7
+ # We use a specific path/repo for the converted OpenVINO version
8
+ model_path = "OpenVINO/gemma-3-4b-it-int4-ov"
9
+ device = "CPU"
10
+
11
+ print("Loading model... this may take a moment.")
12
+ pipe = VLMPipeline(model_path, device)
13
+
14
+ # 2. Define the Inference Function
15
+ def generate_response(text_prompt, input_image=None):
16
+ try:
17
+ # Configuration for generation
18
+ config = {
19
+ "max_new_tokens": 512,
20
+ "do_sample": True,
21
+ "temperature": 0.7,
22
+ "top_p": 0.9,
23
+ }
24
+
25
+ # If an image is provided, the pipeline handles it natively
26
+ if input_image is not None:
27
+ # Gemma 3/VLM prompt formatting usually requires the image first
28
+ output = pipe.generate(text_prompt, image=input_image, **config)
29
+ else:
30
+ # Text-only mode
31
+ output = pipe.generate(text_prompt, **config)
32
+
33
+ return output
34
+ except Exception as e:
35
+ return f"Error: {str(e)}"
36
+
37
+ # 3. Create the Gradio Interface
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("# Gemma 3 4B - Discord Backend")
40
+
41
+ with gr.Row():
42
+ txt_input = gr.Textbox(label="Prompt")
43
+ img_input = gr.Image(type="pil", label="Image (Optional)")
44
+
45
+ output = gr.Textbox(label="Response")
46
+ submit_btn = gr.Button("Generate")
47
+
48
+ submit_btn.click(fn=generate_response, inputs=[txt_input, img_input], outputs=output)
49
+
50
+ # 4. Launch (API is automatically enabled)
51
+ demo.launch()