Spaces:

phucndh
/

doc-chat-system

Build error

phucndh commited on Feb 24, 2025

Commit

04fffa4

1 Parent(s): ae39eb5

Start test v10

Files changed (1) hide show

app.py CHANGED Viewed

@@ -121,17 +121,35 @@ def answer_query(question):
     if not question.strip():
         return "Vui lòng nhập câu hỏi."
-    # Tạo prompt kết hợp tài liệu và câu hỏi
-    prompt = f"Dựa trên tài liệu sau đây:\n{document_content}\nHỏi: {question}\nTrả lời:"
     input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
-    outputs = model.generate(input_ids, max_length=150)
     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if len(answer.split()) < 3:
-        return "Nội dung nằm ngoài tài liệu."
     return answer
 chat_interface = gr.Interface(
     fn=answer_query,
     inputs=gr.Textbox(lines=2, placeholder="Nhập câu hỏi của bạn..."),

     if not question.strip():
         return "Vui lòng nhập câu hỏi."
+    # Nếu nội dung tài liệu quá dài, giới hạn lại (ví dụ: chỉ lấy phần đầu)
+    if len(document_content) > 1000:
+        document_content = document_content[:1000] + "\n... (đã rút gọn)"
+    # Tạo prompt với chỉ dẫn rõ ràng
+    prompt = (
+        "Bạn là một chuyên gia pháp luật. Hãy trả lời câu hỏi dưới đây dựa trên nội dung tài liệu.\n\n"
+        "Nội dung tài liệu:\n"
+        f"{document_content}\n\n"
+        "Câu hỏi: {question}\n\n"
+        "Trả lời chi tiết:"
+    )
+    prompt = prompt.format(question=question)
     input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
+    # Sử dụng beam search để tạo ra câu trả lời mạch lạc hơn
+    outputs = model.generate(
+        input_ids,
+        max_length=200,
+        num_beams=5,
+        early_stopping=True
+    )
     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if len(answer.split()) < 3:
+        return "Nội dung nằm ngoài tài liệu hoặc mô hình chưa được huấn luyện tốt."
     return answer
 chat_interface = gr.Interface(
     fn=answer_query,
     inputs=gr.Textbox(lines=2, placeholder="Nhập câu hỏi của bạn..."),