huydt11502
Add RAG integration: Flask API server, disease selector, evaluation system with improved case generation
74b76f3 | import sys | |
| import os | |
| # ✅ FIX PATH - QUAN TRỌNG! | |
| sys.path.insert(0, r'D:\Storage\rag_project\src') # Thêm src vào đầu path | |
| from data_loader import DataLoader | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| docs = DataLoader.load_all_chunks() | |
| print(f"✅ Loaded {len(docs)} docs") | |
| embeddings = HuggingFaceEmbeddings(model_name="bkai-foundation-models/vietnamese-bi-encoder") | |
| vs = FAISS.from_documents(docs, embeddings) | |
| query = "X-quang phổi 60-83% bình thường 3 tháng" | |
| # OLD: FAISS thuần | |
| old_docs = vs.similarity_search(query, k=3) | |
| print("\n❌ OLD FAISS:") | |
| for doc in old_docs: | |
| print(f" {doc.metadata['chunk_title'][:40]} | {doc.metadata['source_file']}") | |
| # NEW: Keyword boost | |
| keywords = ["X-quang", "phổi", "bình thường", "3 tháng"] | |
| boosted = [] | |
| for doc_id, doc in vs.docstore._dict.items(): | |
| score = sum(1 for kw in keywords if kw in doc.page_content.lower()) | |
| if score > 0: | |
| boosted.append(doc) | |
| print(f"\n✅ KEYWORD BOOST HIT: score={score}") | |
| print(f" {doc.metadata['chunk_title']}") | |
| print(f" Preview: {doc.page_content[:100]}...") | |
| break | |
| if not boosted: | |
| print("\n❌ KHÔNG TÌM THẤY KEYWORD NÀO!") | |