MedChat / test /test_hybrid.py
huydt11502
Add RAG integration: Flask API server, disease selector, evaluation system with improved case generation
74b76f3
import sys
import os
# ✅ FIX PATH - QUAN TRỌNG!
sys.path.insert(0, r'D:\Storage\rag_project\src') # Thêm src vào đầu path
from data_loader import DataLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
docs = DataLoader.load_all_chunks()
print(f"✅ Loaded {len(docs)} docs")
embeddings = HuggingFaceEmbeddings(model_name="bkai-foundation-models/vietnamese-bi-encoder")
vs = FAISS.from_documents(docs, embeddings)
query = "X-quang phổi 60-83% bình thường 3 tháng"
# OLD: FAISS thuần
old_docs = vs.similarity_search(query, k=3)
print("\n❌ OLD FAISS:")
for doc in old_docs:
print(f" {doc.metadata['chunk_title'][:40]} | {doc.metadata['source_file']}")
# NEW: Keyword boost
keywords = ["X-quang", "phổi", "bình thường", "3 tháng"]
boosted = []
for doc_id, doc in vs.docstore._dict.items():
score = sum(1 for kw in keywords if kw in doc.page_content.lower())
if score > 0:
boosted.append(doc)
print(f"\n✅ KEYWORD BOOST HIT: score={score}")
print(f" {doc.metadata['chunk_title']}")
print(f" Preview: {doc.page_content[:100]}...")
break
if not boosted:
print("\n❌ KHÔNG TÌM THẤY KEYWORD NÀO!")