Spaces:

VietCat
/

RAGSample

Sleeping

App Files Files Community

RAGSample / rag_core /business.py

VietCat

fix log

f3fa5b1 10 months ago

raw

history blame contribute delete

3.48 kB

	import logging
	from rag_core.chunker import chunk_legal_text
	from rag_core.embedder import get_embedding
	from rag_core.retriever import Retriever
	from rag_core.llm import generate_answer
	from rag_core.utils import log_timed

	retriever = Retriever()
	ready = retriever.index is not None

	logging.info("📦 Khởi tạo retriever")

	def is_ready():
	return ready

	@log_timed("xây FAISS index")
	def build_index():
	global ready
	logging.info("🔄 Bắt đầu xây FAISS index từ file dữ liệu...")
	try:
	with open("data/raw_law.txt", "r", encoding="utf-8") as f:
	text = f.read()
	logging.info(f"📄 Đọc dữ liệu xong, độ dài: {len(text)} ký tự")
	chunks = chunk_legal_text(text)
	logging.info(f"✂️ Chunking xong, tổng số chunk: {len(chunks)}")
	retriever.build(chunks, get_embedding)
	ready = True
	logging.info("✅ Xây FAISS index thành công.")
	except Exception as e:
	logging.error(f"❌ Lỗi khi xây index: {e}")
	raise

	@log_timed("rescan FAISS index")
	def rescan_index():
	global ready
	logging.info("🔍 Bắt đầu kiểm tra và cập nhật index...")
	if retriever.index is None:
	logging.info("⚠️ Chưa có index. Gọi build_index().")
	build_index()
	return {"status": "✅ Tạo mới FAISS index."}
	else:
	try:
	with open("data/raw_law.txt", "r", encoding="utf-8") as f:
	text = f.read()
	logging.info(f"📄 Đọc dữ liệu xong, độ dài: {len(text)} ký tự")
	chunks = chunk_legal_text(text)
	logging.info(f"🔁 Rescan: tổng số chunk mới: {len(chunks)}")
	retriever.rescan_and_append(chunks, get_embedding)
	logging.info("✅ Đã cập nhật index với các chunk mới.")
	return {"status": "✅ Rescan & update thành công."}
	except Exception as e:
	logging.error(f"❌ Lỗi khi rescan index: {e}")
	return {"status": f"❌ Lỗi khi rescan: {e}"}

	@log_timed("trả lời câu hỏi")
	def answer_query(query: str) -> str:
	logging.info(f"❓ Nhận câu hỏi: {query}")
	if not ready:
	logging.warning("⚠️ Index chưa sẵn sàng.")
	return {"error": "Index chưa sẵn sàng. Vui lòng bấm 'Xây Index' hoặc gọi API /rescan."}
	try:
	docs = retriever.query(query, get_embedding)
	logging.info(f"📚 Truy xuất được {len(docs)} đoạn liên quan")
	for i, doc in enumerate(docs):
	# logging.info(f"🔍 Đoạn {i+1}: {doc[:300]}{'...' if len(doc) > 300 else ''}")
	logging.info(f"🔍 Đoạn {i+1}: {doc}")

	prompt = (
	"Bạn là một trợ lý AI có kiến thức pháp luật, hãy trả lời câu hỏi dựa trên các đoạn luật sau. "
	"Chỉ sử dụng thông tin có trong các đoạn, không tự đoán.\n"
	)
	prompt += "\n\n".join(docs)
	prompt += f"\n\nCâu hỏi: {query}\nTrả lời:"

	logging.info("🧠 Prompt gửi đến LLM:")
	logging.info(prompt[:1000] + ("..." if len(prompt) > 1000 else ""))

	answer = generate_answer(prompt)
	logging.info(f"💬 Câu trả lời từ LLM: {answer}")
	return {"answer": answer}
	except Exception as e:
	logging.error(f"❌ Lỗi khi trả lời câu hỏi: {e}")
	return {"error": str(e)}