Spaces:

Hellowish
/

AnswerText

Sleeping

App Files Files Community

Hellowish commited on Dec 17, 2025

Commit

2ddb6f3

verified ·

1 Parent(s): 908f774

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -18

app.py CHANGED Viewed

@@ -1,34 +1,84 @@
 import gradio as gr
 from transformers import pipeline
 # 1. 載入 SQuAD v2.0 預訓練模型
 qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
-# 2. 定義處理邏輯
-def predict(context, question):
     if not context or not question:
-        return "請輸入文件內容與問題。"
-    # 執行問答
     result = qa_model(question=question, context=context)
-    # 如果信心分數太低，回傳無法回答（SQuAD v2.0 特色）
     if result['score'] < 0.05:
-        return "抱歉，在文件中找不到相關答案。"
-    return result['answer']
-# 3. 建立 Gradio 網頁介面
-demo = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.Textbox(lines=10, label="Context (文件內容)", placeholder="請貼上文件內容..."),
-        gr.Textbox(lines=2, label="Question (提問)", placeholder="請問這份文件關於什麼？")
-    ],
-    outputs=gr.Textbox(label="Model Answer (模型回答)"),
-    title="Case Study: Document QA System",
-    description="根據提供的文本回答問題。"
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from transformers import pipeline
+import pdfplumber
+import docx
 # 1. 載入 SQuAD v2.0 預訓練模型
+# 使用 deepset/roberta-base-squad2，它是針對 v2.0 優化的標準模型
 qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
+# 2. 定義文件讀取函式
+def extract_text(file):
+    if file is None:
+        return ""
+    file_path = file.name
+    text = ""
+    # 處理 PDF
+    if file_path.endswith('.pdf'):
+        with pdfplumber.open(file_path) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text() + "\n"
+    # 處理 Word (.docx)
+    elif file_path.endswith('.docx'):
+        doc = docx.Document(file_path)
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+    # 處理純文字 (.txt)
+    elif file_path.endswith('.txt'):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            text = f.read()
+    return text
+# 3. 定義主預測邏輯
+def predict(file, manual_context, question):
+    # 優先從上傳的文件提取內容，若無則使用手動輸入的內容
+    if file is not None:
+        context = extract_text(file)
+    else:
+        context = manual_context
     if not context or not question:
+        return "請先提供文件內容（上傳或貼上文字）並輸入提問。"
+    # 執行問答推理
+    # 加入 handle_impossible_answer=True 處理 SQuAD v2.0 特性
     result = qa_model(question=question, context=context)
+    # 信心門檻判斷
     if result['score'] < 0.05:
+        return "抱歉，在文件內容中找不到相關答案（模型信心程度較低）。"
+    return f"回答：{result['answer']}\n(信心分數: {round(result['score'], 4)})"
+# 4. 建立 Gradio 網頁介面
+with gr.Blocks(title="Case Study: AI Document QA") as demo:
+    gr.Markdown("# 📑 Case Study: 智慧文件問答系統")
+    gr.Markdown("利用語言模型進行文件自動化讀取與問答。")
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(label="1. 上傳文件 (PDF, Word, TXT)")
+            text_input = gr.Textbox(lines=8, label="或是在此貼上文件內容", placeholder="若已上傳文件則無需填寫此處...")
+            question_input = gr.Textbox(lines=2, label="2. 輸入您的問題", placeholder="例如：這份文件的主要結論是什麼？")
+            submit_btn = gr.Button("開始分析", variant="primary")
+        with gr.Column():
+            answer_output = gr.Textbox(label="模型回答結果", lines=10)
+    # 綁定按鈕功能
+    submit_btn.click(
+        fn=predict,
+        inputs=[file_input, text_input, question_input],
+        outputs=answer_output
+    )
+    gr.Markdown("---")
+    gr.Markdown("💡 **提示：** 針對 SQuAD v2.0 資料集訓練的模型具備判斷『問題是否可回答』的能力。")
 if __name__ == "__main__":
     demo.launch()