fumiyaaa commited on
Commit
48db211
·
verified ·
1 Parent(s): 8d3d2a6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -32
app.py CHANGED
@@ -3,6 +3,8 @@ import torch
3
  from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
4
  from PIL import Image
5
  import numpy as np
 
 
6
 
7
  # モデルとプロセッサの読み込み
8
  model_name = "Qwen/Qwen3-VL-4B-Instruct"
@@ -103,23 +105,126 @@ def transcribe_handwriting(image):
103
  return output_text[0] if output_text else "文字を認識できませんでした。"
104
 
105
 
106
- def process_sketch(image_dict):
107
- """ImageEditorからの入力を処理"""
108
- if image_dict is None:
109
  return "手書きしてください。"
110
 
111
- # gr.ImageEditorの出力は辞書形式: {"background": ..., "layers": [...], "composite": ...}
112
- if isinstance(image_dict, dict):
113
- image = image_dict.get("composite")
114
- if image is None:
115
- image = image_dict.get("background")
116
- else:
117
- image = image_dict
118
-
119
- if image is None:
120
- return "手書きしてください。"
121
-
122
- return transcribe_handwriting(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
 
125
  # Gradioインターフェースの構築
@@ -157,26 +262,27 @@ with gr.Blocks(title="手書き文字認識システム") as demo:
157
 
158
  with gr.Tab("手書き入力"):
159
  gr.Markdown("マウスやタッチで文字を書いてください。")
 
 
 
 
 
 
 
160
  with gr.Row():
161
- with gr.Column():
162
- sketch_input = gr.ImageEditor(
163
- label="手書きエリア",
164
- sources=(),
165
- brush=gr.Brush(colors=["#000000"], color_mode="fixed"),
166
- canvas_size=(600, 400),
167
- type="pil",
168
- )
169
- sketch_btn = gr.Button("文字を認識", variant="primary")
170
- with gr.Column():
171
- sketch_output = gr.Textbox(
172
- label="認識結果",
173
- lines=10,
174
- )
175
 
 
176
  sketch_btn.click(
177
- fn=process_sketch,
178
- inputs=sketch_input,
179
- outputs=sketch_output,
 
180
  )
181
 
182
  gr.Markdown(
 
3
  from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
4
  from PIL import Image
5
  import numpy as np
6
+ import base64
7
+ from io import BytesIO
8
 
9
  # モデルとプロセッサの読み込み
10
  model_name = "Qwen/Qwen3-VL-4B-Instruct"
 
105
  return output_text[0] if output_text else "文字を認識できませんでした。"
106
 
107
 
108
+ def process_canvas(base64_data):
109
+ """Canvasからのbase64データを処理"""
110
+ if not base64_data or base64_data == "":
111
  return "手書きしてください。"
112
 
113
+ try:
114
+ # data:image/png;base64,... の形式から実際のbase64部分を取得
115
+ if "," in base64_data:
116
+ base64_data = base64_data.split(",")[1]
117
+
118
+ # base64デコード
119
+ image_data = base64.b64decode(base64_data)
120
+ image = Image.open(BytesIO(image_data))
121
+
122
+ return transcribe_handwriting(image)
123
+ except Exception as e:
124
+ return f"エラーが発生しました: {str(e)}"
125
+
126
+
127
+ # カスタムHTML Canvasとdrawing JavaScript
128
+ canvas_html = """
129
+ <div id="canvas-container" style="display: flex; flex-direction: column; align-items: center; gap: 10px;">
130
+ <canvas id="sketch-canvas" width="600" height="400"
131
+ style="border: 2px solid #333; background: white; cursor: crosshair; touch-action: none;"></canvas>
132
+ <button id="clear-btn" type="button"
133
+ style="padding: 8px 20px; background: #ff4444; color: white; border: none; border-radius: 5px; cursor: pointer;">
134
+ クリア
135
+ </button>
136
+ </div>
137
+ <script>
138
+ (function() {
139
+ const canvas = document.getElementById('sketch-canvas');
140
+ const ctx = canvas.getContext('2d');
141
+ const clearBtn = document.getElementById('clear-btn');
142
+
143
+ let isDrawing = false;
144
+ let lastX = 0;
145
+ let lastY = 0;
146
+
147
+ // 初期化
148
+ ctx.fillStyle = 'white';
149
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
150
+ ctx.strokeStyle = '#000000';
151
+ ctx.lineWidth = 3;
152
+ ctx.lineCap = 'round';
153
+ ctx.lineJoin = 'round';
154
+
155
+ function getPos(e) {
156
+ const rect = canvas.getBoundingClientRect();
157
+ const scaleX = canvas.width / rect.width;
158
+ const scaleY = canvas.height / rect.height;
159
+
160
+ if (e.touches) {
161
+ return {
162
+ x: (e.touches[0].clientX - rect.left) * scaleX,
163
+ y: (e.touches[0].clientY - rect.top) * scaleY
164
+ };
165
+ }
166
+ return {
167
+ x: (e.clientX - rect.left) * scaleX,
168
+ y: (e.clientY - rect.top) * scaleY
169
+ };
170
+ }
171
+
172
+ function startDrawing(e) {
173
+ isDrawing = true;
174
+ const pos = getPos(e);
175
+ lastX = pos.x;
176
+ lastY = pos.y;
177
+ e.preventDefault();
178
+ }
179
+
180
+ function draw(e) {
181
+ if (!isDrawing) return;
182
+ e.preventDefault();
183
+
184
+ const pos = getPos(e);
185
+ ctx.beginPath();
186
+ ctx.moveTo(lastX, lastY);
187
+ ctx.lineTo(pos.x, pos.y);
188
+ ctx.stroke();
189
+ lastX = pos.x;
190
+ lastY = pos.y;
191
+ }
192
+
193
+ function stopDrawing(e) {
194
+ isDrawing = false;
195
+ e.preventDefault();
196
+ }
197
+
198
+ // Mouse events
199
+ canvas.addEventListener('mousedown', startDrawing);
200
+ canvas.addEventListener('mousemove', draw);
201
+ canvas.addEventListener('mouseup', stopDrawing);
202
+ canvas.addEventListener('mouseout', stopDrawing);
203
+
204
+ // Touch events
205
+ canvas.addEventListener('touchstart', startDrawing);
206
+ canvas.addEventListener('touchmove', draw);
207
+ canvas.addEventListener('touchend', stopDrawing);
208
+
209
+ // Clear button
210
+ clearBtn.addEventListener('click', function() {
211
+ ctx.fillStyle = 'white';
212
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
213
+ });
214
+ })();
215
+ </script>
216
+ """
217
+
218
+ # JavaScriptでCanvasからbase64を取得
219
+ get_canvas_js = """
220
+ async (current_value) => {
221
+ const canvas = document.getElementById('sketch-canvas');
222
+ if (canvas) {
223
+ return canvas.toDataURL('image/png');
224
+ }
225
+ return '';
226
+ }
227
+ """
228
 
229
 
230
  # Gradioインターフェースの構築
 
262
 
263
  with gr.Tab("手書き入力"):
264
  gr.Markdown("マウスやタッチで文字を書いてください。")
265
+
266
+ # カスタムCanvas
267
+ canvas = gr.HTML(canvas_html)
268
+
269
+ # 隠しテキストボックス(Canvas dataを受け取る)
270
+ canvas_data = gr.Textbox(visible=False, elem_id="canvas-data")
271
+
272
  with gr.Row():
273
+ sketch_btn = gr.Button("文字を認識", variant="primary")
274
+
275
+ sketch_output = gr.Textbox(
276
+ label="認識結果",
277
+ lines=10,
278
+ )
 
 
 
 
 
 
 
 
279
 
280
+ # ボタンクリック時にJSでcanvasデータを取得してから処理
281
  sketch_btn.click(
282
+ fn=process_canvas,
283
+ inputs=[canvas_data],
284
+ outputs=[sketch_output],
285
+ js=get_canvas_js,
286
  )
287
 
288
  gr.Markdown(