Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| from dotenv import load_dotenv | |
| root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) | |
| sys.path.append(root_dir) | |
| from langchain_community.cross_encoders import HuggingFaceCrossEncoder | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_groq import ChatGroq | |
| from langchain_openai import ChatOpenAI | |
| from langchain_classic.retrievers.document_compressors import CrossEncoderReranker | |
| from langchain_core.messages import SystemMessage | |
| from langchain_classic.retrievers import ContextualCompressionRetriever, MultiQueryRetriever | |
| from src.retrieval.vector_store import get_vector_store | |
| from src.retrieval.retriever import get_retriever | |
| from src.chains.prompt import get_rag_prompt | |
| load_dotenv() | |
| def create_rag_chain(disease_label=None): | |
| llm = ChatGroq( | |
| model="openai/gpt-oss-20b", | |
| streaming=True, | |
| temperature=0.2, | |
| api_key=os.getenv("GROQ_API_KEY"), | |
| ) | |
| vs = get_vector_store() | |
| base_retriever = get_retriever( | |
| vector_store=vs, | |
| search_type="similarity", | |
| k=5, | |
| filter_label=disease_label | |
| ) | |
| mq_retriever = MultiQueryRetriever.from_llm( | |
| retriever=base_retriever, | |
| llm=llm | |
| ) | |
| print("Memuat model Re-ranker (BAAI/bge-reranker-v2-m3)...") | |
| cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base") | |
| # Kita hanya ambil 3 dokumen paling relevan (top_n=3) setelah di-rerank | |
| compressor = CrossEncoderReranker(model=cross_encoder, top_n=3) | |
| # 4. BUNGKUS MENJADI COMPRESSION RETRIEVER | |
| rerank_retriever = ContextualCompressionRetriever( | |
| base_compressor=compressor, | |
| base_retriever=mq_retriever | |
| ) | |
| prompt = get_rag_prompt() | |
| def format_docs(docs): | |
| print("\n" + "="*50) | |
| print("🎯 [DEBUG] 3 DOKUMEN TERBAIK SETELAH DI-RERANK:") | |
| for i, doc in enumerate(docs): | |
| sumber = doc.metadata.get('label') or doc.metadata.get('source') or 'Sumber tidak diketahui' | |
| skor = doc.metadata.get('relevance_score', 'N/A') | |
| print(f" [{i+1}] Topik: {sumber} | Skor Relevansi: {skor}") | |
| print("="*50 + "\n") | |
| # Gabungkan teks dokumen final | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| rag_chain = ( | |
| # Gunakan rerank_retriever sebagai sumber konteks | |
| {"context": rerank_retriever | format_docs, "input": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return rag_chain | |