|
|
import gradio as gr |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from groq import Groq |
|
|
from linebot import LineBotApi |
|
|
from linebot.models import TextSendMessage |
|
|
import base64 |
|
|
import io |
|
|
import csv |
|
|
import pandas as pd |
|
|
from docx import Document |
|
|
import tempfile |
|
|
import os |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
LINE_CHANNEL_ACCESS_TOKEN = '3eAF/FtRlpUFGvYt4FPi2BlsysXgXWuqAXwPkd9dv5j9aS96M1u1IXNHwWojen0Lcf3NB2S/UdDvhpTLykvTrkb+mtbDQgRZpElqt/WNjT/z+86QeaEyyhExX6I8GhOEwSPdk5AICDZO1+GTfygC5gdB04t89/1O/w1cDnyilFU=' |
|
|
|
|
|
def preview_image(image, flip_horizontal=False): |
|
|
"""預覽圖片,顯示翻轉效果""" |
|
|
if image is None: |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
if flip_horizontal: |
|
|
|
|
|
return image.transpose(Image.FLIP_LEFT_RIGHT) |
|
|
else: |
|
|
|
|
|
return image |
|
|
|
|
|
def encode_image(image): |
|
|
"""將 PIL Image 轉換為 base64 編碼""" |
|
|
buffered = io.BytesIO() |
|
|
image.save(buffered, format="JPEG") |
|
|
return base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
|
|
|
def send_line_message(user_id, message): |
|
|
""" |
|
|
發送訊息到指定的 LINE 用戶 |
|
|
|
|
|
:param user_id: LINE 用戶 ID |
|
|
:param message: 要發送的文字訊息 |
|
|
:return: 發送狀態 (True/False) 和訊息 |
|
|
""" |
|
|
try: |
|
|
|
|
|
if not user_id or not user_id.strip(): |
|
|
return False, "LINE User ID 不能為空" |
|
|
|
|
|
|
|
|
line_bot_api = LineBotApi(LINE_CHANNEL_ACCESS_TOKEN) |
|
|
|
|
|
|
|
|
text_message = TextSendMessage(text=message) |
|
|
|
|
|
|
|
|
line_bot_api.push_message(user_id, text_message) |
|
|
|
|
|
return True, f"訊息已成功發送至 {user_id}" |
|
|
except Exception as e: |
|
|
return False, f"發送訊息時發生錯誤: {str(e)}" |
|
|
|
|
|
def process_ocr_with_linebot(image, api_key, prompt, flip_horizontal, line_user_id, send_to_line): |
|
|
"""使用 Groq API 進行 OCR 文字辨識並可選擇性發送到 Line Bot""" |
|
|
if image is None: |
|
|
return "請先拍照或上傳圖片", None, None, "未處理任何圖片" |
|
|
|
|
|
if not api_key.strip(): |
|
|
return "請輸入有效的 Groq API Key", None, None, "API Key 為空" |
|
|
|
|
|
if not prompt.strip(): |
|
|
prompt = "請幫我辨識拍照的文字和內容進行OCR辨識" |
|
|
|
|
|
try: |
|
|
|
|
|
processed_image = image |
|
|
if flip_horizontal: |
|
|
processed_image = image.transpose(Image.FLIP_LEFT_RIGHT) |
|
|
print("已手動翻轉圖片") |
|
|
else: |
|
|
print("使用原始圖片方向") |
|
|
|
|
|
|
|
|
base64_image = encode_image(processed_image) |
|
|
image_content = { |
|
|
"type": "image_url", |
|
|
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} |
|
|
} |
|
|
|
|
|
|
|
|
client = Groq(api_key=api_key) |
|
|
|
|
|
|
|
|
completion = client.chat.completions.create( |
|
|
model="meta-llama/llama-4-scout-17b-16e-instruct", |
|
|
messages=[{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "text", "text": prompt}, |
|
|
image_content |
|
|
] |
|
|
}], |
|
|
temperature=1, |
|
|
max_completion_tokens=512, |
|
|
top_p=1, |
|
|
stream=False, |
|
|
stop=None, |
|
|
) |
|
|
|
|
|
|
|
|
content = completion.choices[0].message.content |
|
|
|
|
|
|
|
|
csv_file = create_csv_output(content) |
|
|
docx_file = create_docx_output(content) |
|
|
|
|
|
|
|
|
line_status = "未發送到 Line Bot" |
|
|
|
|
|
|
|
|
if send_to_line: |
|
|
|
|
|
formatted_message = f"""📸 OCR 辨識結果 |
|
|
時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
|
|
|
|
辨識內容: |
|
|
{content} |
|
|
|
|
|
--- |
|
|
此訊息由 OCR 系統自動發送""" |
|
|
|
|
|
success, message = send_line_message(line_user_id, formatted_message) |
|
|
if success: |
|
|
line_status = f"✅ {message}" |
|
|
else: |
|
|
line_status = f"❌ {message}" |
|
|
|
|
|
return content, csv_file, docx_file, line_status |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"OCR 辨識發生錯誤: {str(e)}" |
|
|
return error_msg, None, None, f"❌ 處理失敗: {str(e)}" |
|
|
|
|
|
def create_csv_output(content): |
|
|
"""創建 CSV 輸出檔案""" |
|
|
try: |
|
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv', encoding='utf-8-sig') |
|
|
|
|
|
|
|
|
writer = csv.writer(temp_file) |
|
|
writer.writerow(['時間戳記', 'OCR辨識結果']) |
|
|
writer.writerow([datetime.now().strftime('%Y-%m-%d %H:%M:%S'), content]) |
|
|
|
|
|
|
|
|
lines = content.split('\n') |
|
|
if len(lines) > 1: |
|
|
writer.writerow([]) |
|
|
writer.writerow(['行號', '內容']) |
|
|
for i, line in enumerate(lines, 1): |
|
|
if line.strip(): |
|
|
writer.writerow([i, line.strip()]) |
|
|
|
|
|
temp_file.close() |
|
|
return temp_file.name |
|
|
|
|
|
except Exception as e: |
|
|
print(f"創建 CSV 檔案時發生錯誤: {e}") |
|
|
return None |
|
|
|
|
|
def create_docx_output(content): |
|
|
"""創建 DOCX 輸出檔案""" |
|
|
try: |
|
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx') |
|
|
temp_file.close() |
|
|
|
|
|
|
|
|
doc = Document() |
|
|
doc.add_heading('OCR 辨識結果', 0) |
|
|
|
|
|
|
|
|
doc.add_heading('辨識時間', level=1) |
|
|
doc.add_paragraph(datetime.now().strftime('%Y年%m月%d日 %H:%M:%S')) |
|
|
|
|
|
|
|
|
doc.add_heading('辨識內容', level=1) |
|
|
doc.add_paragraph(content) |
|
|
|
|
|
|
|
|
lines = content.split('\n') |
|
|
if len(lines) > 3: |
|
|
doc.add_heading('分行內容', level=1) |
|
|
for i, line in enumerate(lines, 1): |
|
|
if line.strip(): |
|
|
doc.add_paragraph(f"{i}. {line.strip()}") |
|
|
|
|
|
doc.save(temp_file.name) |
|
|
return temp_file.name |
|
|
|
|
|
except Exception as e: |
|
|
print(f"創建 DOCX 檔案時發生錯誤: {e}") |
|
|
return None |
|
|
|
|
|
def clear_inputs(): |
|
|
"""清除輸入內容""" |
|
|
return None, "", "", "", False, "已清除所有輸入" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="OCR + Line Bot 整合系統", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# 📸 OCR 文字辨識 + Line Bot 整合系統") |
|
|
gr.Markdown("使用攝像頭拍照或上傳圖片,透過 AI 進行文字辨識,並可選擇性發送結果到 Line Bot") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.Markdown("### 📝 輸入設定") |
|
|
|
|
|
|
|
|
image_input = gr.Image( |
|
|
sources=['webcam', 'upload'], |
|
|
type='pil', |
|
|
label="拍照或上傳圖片" |
|
|
) |
|
|
|
|
|
|
|
|
api_key_input = gr.Textbox( |
|
|
label="Groq API Key", |
|
|
placeholder="請輸入您的 Groq API Key", |
|
|
type="password", |
|
|
value="" |
|
|
) |
|
|
|
|
|
|
|
|
prompt_input = gr.Textbox( |
|
|
label="辨識提示詞", |
|
|
placeholder="輸入您想要的辨識提示...", |
|
|
value="請幫我辨識拍照的文字和內容進行OCR辨識,請盡可能詳細和準確地提取所有可見的文字內容。", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### 📱 Line Bot 設定") |
|
|
|
|
|
line_user_id_input = gr.Textbox( |
|
|
label="LINE User ID", |
|
|
placeholder="請輸入要發送訊息的 LINE User ID", |
|
|
value="U377f923cd08097b0a01116f8e942650b", |
|
|
info="填入您想要發送 OCR 結果的 Line 用戶 ID" |
|
|
) |
|
|
|
|
|
send_to_line_checkbox = gr.Checkbox( |
|
|
label="📤 發送結果到 Line Bot", |
|
|
value=False, |
|
|
info="勾選此選項將會把 OCR 辨識結果發送到指定的 Line 用戶" |
|
|
) |
|
|
|
|
|
|
|
|
flip_checkbox = gr.Checkbox( |
|
|
label="🔄 手動翻轉圖片(如果文字方向不對才勾選)", |
|
|
value=False, |
|
|
info="通常攝像頭拍照後圖片方向是正確的,只在文字顛倒時才勾選此選項" |
|
|
) |
|
|
|
|
|
|
|
|
preview_image_output = gr.Image( |
|
|
label="圖片預覽(將要辨識的圖片)", |
|
|
type='pil', |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
process_btn = gr.Button("🔍 開始辨識", variant="primary") |
|
|
clear_btn = gr.Button("🗑️ 清除", variant="secondary") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.Markdown("### 📊 辨識結果") |
|
|
|
|
|
|
|
|
text_output = gr.Textbox( |
|
|
label="辨識結果", |
|
|
placeholder="辨識結果將顯示在這裡...", |
|
|
lines=10, |
|
|
max_lines=15 |
|
|
) |
|
|
|
|
|
|
|
|
line_status_output = gr.Textbox( |
|
|
label="Line Bot 發送狀態", |
|
|
placeholder="Line Bot 發送狀態將顯示在這裡...", |
|
|
lines=2, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### 📁 下載檔案") |
|
|
with gr.Row(): |
|
|
csv_download = gr.File( |
|
|
label="下載 CSV 檔案", |
|
|
visible=True |
|
|
) |
|
|
docx_download = gr.File( |
|
|
label="下載 DOCX 檔案", |
|
|
visible=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("📖 使用說明", open=False): |
|
|
gr.Markdown(""" |
|
|
### 使用步驟: |
|
|
1. **獲取 API Key**:前往 [Groq官網](https://groq.com) 註冊並取得 API Key |
|
|
2. **設定 Line Bot**:如需發送結果到 Line,請確認 Line User ID 正確 |
|
|
3. **拍照或上傳**:使用攝像頭拍照或上傳包含文字的圖片 |
|
|
4. **檢查方向**:查看圖片預覽中的文字方向是否正確 |
|
|
5. **必要時翻轉**:只有當文字看起來是顛倒的時候才勾選「手動翻轉圖片」 |
|
|
6. **輸入設定**: |
|
|
- 在上方欄位輸入您的 Groq API Key |
|
|
- 確認 LINE User ID(如需發送到 Line) |
|
|
- 勾選「發送結果到 Line Bot」(如需要) |
|
|
7. **自訂提示詞**:可選擇性修改辨識提示詞以獲得更好的結果 |
|
|
8. **開始辨識**:點擊「開始辨識」按鈕 |
|
|
9. **查看結果**: |
|
|
- 辨識結果會顯示在文字區域 |
|
|
- Line Bot 發送狀態會顯示在狀態欄 |
|
|
- 可下載 CSV 和 DOCX 格式的結果檔案 |
|
|
|
|
|
### 支援功能: |
|
|
- 📷 即時攝像頭拍照 |
|
|
- 🔄 攝像頭鏡像修正(解決左右相反問題) |
|
|
- 📤 圖片檔案上傳 |
|
|
- 🤖 AI 文字辨識 |
|
|
- 📱 Line Bot 整合(自動發送辨識結果) |
|
|
- 📄 CSV 格式輸出 |
|
|
- 📝 Word 文檔輸出 |
|
|
- 🔧 自訂提示詞 |
|
|
|
|
|
### Line Bot 設定說明: |
|
|
- **LINE User ID**: 可透過 Line Bot 取得用戶的 User ID |
|
|
- **發送格式**: 系統會自動格式化辨識結果並加上時間戳記 |
|
|
- **發送狀態**: 會即時顯示發送成功或失敗的狀態 |
|
|
""") |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
fn=process_ocr_with_linebot, |
|
|
inputs=[ |
|
|
image_input, |
|
|
api_key_input, |
|
|
prompt_input, |
|
|
flip_checkbox, |
|
|
line_user_id_input, |
|
|
send_to_line_checkbox |
|
|
], |
|
|
outputs=[text_output, csv_download, docx_download, line_status_output] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_inputs, |
|
|
outputs=[ |
|
|
image_input, |
|
|
text_output, |
|
|
api_key_input, |
|
|
prompt_input, |
|
|
send_to_line_checkbox, |
|
|
line_status_output |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
image_input.change( |
|
|
fn=preview_image, |
|
|
inputs=[image_input, flip_checkbox], |
|
|
outputs=preview_image_output |
|
|
) |
|
|
|
|
|
flip_checkbox.change( |
|
|
fn=preview_image, |
|
|
inputs=[image_input, flip_checkbox], |
|
|
outputs=preview_image_output |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
share=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=None, |
|
|
show_error=True |
|
|
) |