Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import pdfplumber | |
| import docx | |
| # 1. 載入 SQuAD v2.0 預訓練模型 | |
| # 使用 deepset/roberta-base-squad2,它是針對 v2.0 優化的標準模型 | |
| qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| # 2. 定義文件讀取函式 | |
| def extract_text(file): | |
| if file is None: | |
| return "" | |
| file_path = file.name | |
| text = "" | |
| # 處理 PDF | |
| if file_path.endswith('.pdf'): | |
| with pdfplumber.open(file_path) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() + "\n" | |
| # 處理 Word (.docx) | |
| elif file_path.endswith('.docx'): | |
| doc = docx.Document(file_path) | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| # 處理純文字 (.txt) | |
| elif file_path.endswith('.txt'): | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| text = f.read() | |
| return text | |
| # 3. 定義主預測邏輯 | |
| def predict(file, manual_context, question): | |
| # 1. 檢查問題是否為空 | |
| if not question.strip(): | |
| return "⚠️ 請輸入您想提問的問題。" | |
| # 2. 判斷資料來源優先級 | |
| source_info = "" | |
| if file is not None: | |
| # 如果有上傳檔案,優先讀取檔案 | |
| context = extract_text(file) | |
| source_info = f"📝 來源:已偵測到上傳檔案 ({file.name.split('/')[-1]})" | |
| elif manual_context.strip(): | |
| # 如果沒有檔案但有貼上文字 | |
| context = manual_context | |
| source_info = "📝 來源:手動輸入的文本" | |
| else: | |
| # 兩者皆無 | |
| return "⚠️ 請先提供文件內容(上傳檔案或是在文字框貼上內容)。" | |
| # 3. 檢查 Context 是否成功提取文字 | |
| if not context.strip(): | |
| return "⚠️ 無法從提供的來源中提取有效文字,請檢查檔案格式。" | |
| # 4. 執行模型推理 | |
| try: | |
| result = qa_model(question=question, context=context) | |
| # SQuAD 2.0 門檻檢查 | |
| if result['score'] < 0.01: | |
| return f"{source_info}\n\n❌ 抱歉,在提供的內容中找不到相關答案。" | |
| return (f"{source_info}\n" | |
| f"🎯 模型回答:{result['answer']}\n" | |
| f"📊 信心分數:{round(result['score'] * 100, 2)}%") | |
| except Exception as e: | |
| return f"❌ 發生錯誤:{str(e)}" | |
| # 4. 建立 Gradio 網頁介面 | |
| with gr.Blocks(title="Case Study: AI Document QA") as demo: | |
| gr.Markdown("# 📑 Case Study: 智慧文件問答系統") | |
| gr.Markdown("利用語言模型進行文件自動化讀取與問答。") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File(label="1. 上傳文件 (PDF, Word, TXT)") | |
| text_input = gr.Textbox(lines=8, label="或是在此貼上文件內容", placeholder="若已上傳文件則無需填寫此處...") | |
| question_input = gr.Textbox(lines=2, label="2. 輸入您的問題", placeholder="例如:這份文件的主要結論是什麼?") | |
| submit_btn = gr.Button("開始分析", variant="primary") | |
| with gr.Column(): | |
| answer_output = gr.Textbox(label="模型回答結果", lines=10) | |
| # 綁定按鈕功能 | |
| submit_btn.click( | |
| fn=predict, | |
| inputs=[file_input, text_input, question_input], | |
| outputs=answer_output | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("💡 **提示:** 針對 SQuAD v2.0 資料集訓練的模型具備判斷『問題是否可回答』的能力。") | |
| if __name__ == "__main__": | |
| demo.launch() |