Spaces:

uyen13
/

chatbot

Sleeping

App Files Files Community

uyen13 commited on May 13, 2025

Commit

3ca3d47

verified ·

1 Parent(s): 0b1e158

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -30

app.py CHANGED Viewed

@@ -48,94 +48,96 @@ def process_pdf(pdf_path):
     )
     texts = text_splitter.split_documents(documents)
-    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
     vectorstore = FAISS.from_documents(texts, embeddings)
     return vectorstore
 def postprocess_answer(answer):
     replacements = {
-        "the context": "tài liệu",
-        "according to the document": "theo nội dung tài liệu",
-        "it is stated that": "trong tài liệu có đề cập rằng",
-        "the answer is": "câu trả lời là",
-        "based on the information": "dựa trên thông tin được cung cấp"
     }
-    for eng, vi in replacements.items():
-        answer = answer.replace(eng, vi)
     answer = answer.strip()
     if answer and len(answer) > 0:
         answer = answer[0].upper() + answer[1:]
     if len(answer.split()) < 4:
-        answer = "Thông tin này hiện chưa rõ ràng. " + answer
     return answer
-# Sửa lại prompt template sử dụng 'question' thay vì 'query'
-template = """Hãy trả lời câu hỏi dựa trên nội dung sau:
 {context}
-Câu hỏi: {question}
-Trả lời bằng tiếng Việt một cách tự nhiên:"""
 QA_PROMPT = PromptTemplate(
     template=template,
-    input_variables=["context", "question"]  # Đổi thành 'question'
 )
 def main():
-    st.set_page_config(page_title="Trợ lý PDF thông minh", page_icon="📘")
-    st.title("Trợ lý PDF thông minh 🤖")
-    st.markdown("Tải lên file PDF và đặt câu hỏi về nội dung tài liệu!")
-    uploaded_file = st.file_uploader("Chọn file PDF", type="pdf")
     if uploaded_file is not None:
         with open("temp.pdf", "wb") as f:
             f.write(uploaded_file.getbuffer())
-        with st.spinner("Đang phân tích tài liệu..."):
             vectorstore = process_pdf("temp.pdf")
         llm = load_llm()
-        # Thêm tham số input_key để ánh xạ đúng tên biến
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
             retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
             return_source_documents=True,
-            input_key="question",  # Thêm ánh xạ input key
             chain_type_kwargs={
                 "prompt": QA_PROMPT,
                 "document_variable_name": "context"
             }
         )
-        query = st.text_input("Nhập câu hỏi của bạn về tài liệu:")
         if query:
-            with st.spinner("Đang tổng hợp câu trả lời..."):
                 try:
-                    # Truyền input dưới dạng dictionary với key đúng
-                    result = qa_chain({"question": query})  # Đổi thành 'question'
                     raw_answer = result["result"]
                     answer = postprocess_answer(raw_answer)
-                    st.markdown("### Câu trả lời")
                     st.success(answer)
-                    with st.expander("Xem chi tiết nguồn tham khảo"):
                         for i, doc in enumerate(result["source_documents"]):
-                            st.markdown(f"**Trích dẫn {i+1}:**")
                             st.info(doc.page_content[:500] + "...")
                 except Exception as e:
-                    st.error(f"Lỗi: {str(e)}")
     else:
-        st.info("Vui lòng tải lên file PDF để bắt đầu.")
 if __name__ == "__main__":
     main()

     )
     texts = text_splitter.split_documents(documents)
+    # Sử dụng model embedding đa ngôn ngữ
+    embeddings = SentenceTransformerEmbeddings(model_name="paraphrase-multilingual-mpnet-base-v2")
     vectorstore = FAISS.from_documents(texts, embeddings)
     return vectorstore
 def postprocess_answer(answer):
+    # Thay thế các cụm từ không tự nhiên trong tiếng Nhật
     replacements = {
+        "the context": "ドキュメント",
+        "according to the document": "文書によりますと",
+        "it is stated that": "記載されている内容では",
+        "the answer is": "答えは",
+        "based on the information": "提供された情報に基づきますと"
     }
+    for eng, jp in replacements.items():
+        answer = answer.replace(eng, jp)
+    # Chuẩn hóa định dạng tiếng Nhật
     answer = answer.strip()
     if answer and len(answer) > 0:
         answer = answer[0].upper() + answer[1:]
+    # Kiểm tra câu trả lời ngắn
     if len(answer.split()) < 4:
+        answer = "情報が不足しているようです。 " + answer
     return answer
+# Prompt template tiếng Nhật
+template = """以下の内容に基づいて質問に自然な日本語で回答してください:
 {context}
+質問: {question}
+回答:"""
 QA_PROMPT = PromptTemplate(
     template=template,
+    input_variables=["context", "question"]
 )
 def main():
+    st.set_page_config(page_title="PDFアシスタント", page_icon="📘")
+    st.title("PDFアシスタント 🤖")
+    st.markdown("PDFファイルをアップロードして内容について質問してください")
+    uploaded_file = st.file_uploader("PDFファイルを選択", type="pdf")
     if uploaded_file is not None:
         with open("temp.pdf", "wb") as f:
             f.write(uploaded_file.getbuffer())
+        with st.spinner("ドキュメントを分析中..."):
             vectorstore = process_pdf("temp.pdf")
         llm = load_llm()
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
             chain_type="stuff",
             retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
             return_source_documents=True,
+            input_key="question",
             chain_type_kwargs={
                 "prompt": QA_PROMPT,
                 "document_variable_name": "context"
             }
         )
+        query = st.text_input("ドキュメントに関する質問を入力:")
         if query:
+            with st.spinner("回答を生成中..."):
                 try:
+                    result = qa_chain({"question": query})
                     raw_answer = result["result"]
                     answer = postprocess_answer(raw_answer)
+                    st.markdown("### 回答")
                     st.success(answer)
+                    with st.expander("参考資料を表示"):
                         for i, doc in enumerate(result["source_documents"]):
+                            st.markdown(f"**引用 {i+1}:**")
                             st.info(doc.page_content[:500] + "...")
                 except Exception as e:
+                    st.error(f"エラーが発生しました: {str(e)}")
     else:
+        st.info("PDFファイルをアップロードしてください")
 if __name__ == "__main__":
     main()