staghado commited on
Commit
111ff5f
Β·
verified Β·
1 Parent(s): 2fcfad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -67
app.py CHANGED
@@ -22,56 +22,29 @@ def image_to_base64(image):
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
24
 
25
- def respond(
26
- message,
27
- history: list[dict[str, str]],
28
- system_message,
29
- max_tokens,
30
- temperature,
31
- top_p,
32
- ):
33
  """
34
- Send messages (with optional images) to vLLM endpoint and stream the response.
35
  """
36
- messages = [{"role": "system", "content": system_message}]
 
37
 
38
- # Add conversation history
39
- for msg in history:
40
- messages.append(msg)
41
 
42
- # Process the current message - check if it contains an image
43
- if message and "files" in message and message["files"]:
44
- # Message has image(s)
45
- content = []
46
-
47
- # Add text if present
48
- if message.get("text", "").strip():
49
- content.append({"type": "text", "text": message["text"]})
50
-
51
- # Add all images
52
- for file_info in message["files"]:
53
- try:
54
- image = Image.open(file_info)
55
- b64_image = image_to_base64(image)
56
- content.append({
57
- "type": "image_url",
58
- "image_url": {"url": f"data:image/png;base64,{b64_image}"}
59
- })
60
- except Exception as e:
61
- print(f"Error processing image: {e}")
62
-
63
- messages.append({"role": "user", "content": content})
64
- else:
65
- # Text-only message
66
- text_content = message if isinstance(message, str) else message.get("text", "")
67
- messages.append({"role": "user", "content": text_content})
68
-
69
  payload = {
70
  "model": MODEL,
71
- "messages": messages,
72
- "max_tokens": max_tokens,
 
 
 
 
 
 
 
73
  "temperature": temperature,
74
- "top_p": top_p,
75
  "stream": True
76
  }
77
 
@@ -111,42 +84,61 @@ def respond(
111
 
112
 
113
  # Build the Gradio Interface
114
- with gr.Blocks(title="πŸ’¬ Vision Chat", theme=gr.themes.Soft()) as demo:
115
  gr.Markdown(
116
  """
117
- # πŸ’¬ Vision-Enabled Chat Interface
118
  **πŸ’‘ How to use:**
119
- 1. Type your message in the chat box
120
- 2. Optionally upload images by clicking the πŸ“Ž icon
121
- 3. Adjust parameters in the accordion below if needed
122
- 4. Press Enter or click Send
123
 
124
- The model can understand both text and images!
125
  """
126
  )
127
 
128
- chatbot = gr.ChatInterface(
129
- respond,
130
- type="messages",
131
- multimodal=True,
132
- additional_inputs=[
133
- gr.Textbox(
134
- value="You are a helpful AI assistant with vision capabilities. You can understand and analyze images.",
135
- label="System message"
136
- ),
137
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"),
138
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
139
- gr.Slider(
140
  minimum=0.1,
141
  maximum=1.0,
142
- value=0.95,
143
  step=0.05,
144
- label="Top-p (nucleus sampling)",
145
- ),
146
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  )
148
 
149
- chatbot.render()
 
 
 
150
 
151
  gr.Markdown("""
152
  ---
 
22
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
23
 
24
 
25
+ def process_image(image, temperature):
 
 
 
 
 
 
 
26
  """
27
+ Send image to vLLM endpoint and stream the response.
28
  """
29
+ if image is None:
30
+ return "Please upload an image first."
31
 
32
+ # Convert image to base64
33
+ b64_image = image_to_base64(image)
 
34
 
35
+ # Build the payload with only image input (no text prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  payload = {
37
  "model": MODEL,
38
+ "messages": [
39
+ {
40
+ "role": "user",
41
+ "content": [
42
+ {"type": "text", "text": ""},
43
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}}
44
+ ]
45
+ }
46
+ ],
47
  "temperature": temperature,
 
48
  "stream": True
49
  }
50
 
 
84
 
85
 
86
  # Build the Gradio Interface
87
+ with gr.Blocks(title="πŸ“– Image OCR", theme=gr.themes.Soft()) as demo:
88
  gr.Markdown(
89
  """
90
+ # πŸ“– Image to Text Extraction
91
  **πŸ’‘ How to use:**
92
+ 1. Upload an image using the upload box
93
+ 2. Adjust temperature if needed
94
+ 3. Click "Extract Text" to process
 
95
 
96
+ The model will extract and format text from your image.
97
  """
98
  )
99
 
100
+ with gr.Row():
101
+ with gr.Column(scale=1):
102
+ image_input = gr.Image(
103
+ type="pil",
104
+ label="πŸ–ΌοΈ Upload Image",
105
+ sources=["upload", "clipboard"]
106
+ )
107
+ temperature = gr.Slider(
 
 
 
 
108
  minimum=0.1,
109
  maximum=1.0,
110
+ value=0.15,
111
  step=0.05,
112
+ label="Temperature"
113
+ )
114
+ submit_btn = gr.Button("Extract Text", variant="primary")
115
+ clear_btn = gr.Button("Clear", variant="secondary")
116
+
117
+ with gr.Column(scale=2):
118
+ output_text = gr.Markdown(
119
+ label="πŸ“„ Extracted Text",
120
+ value="<div style='min-height: 400px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>"
121
+ )
122
+
123
+ with gr.Row():
124
+ raw_output = gr.Textbox(
125
+ label="Raw Output",
126
+ placeholder="Raw text will appear here...",
127
+ lines=10,
128
+ show_copy_button=True
129
+ )
130
+
131
+ # Event handlers
132
+ submit_btn.click(
133
+ fn=lambda img, temp: (process_image(img, temp), process_image(img, temp)),
134
+ inputs=[image_input, temperature],
135
+ outputs=[output_text, raw_output]
136
  )
137
 
138
+ clear_btn.click(
139
+ fn=lambda: (None, "", ""),
140
+ outputs=[image_input, output_text, raw_output]
141
+ )
142
 
143
  gr.Markdown("""
144
  ---