# eval_basic.py (no-reference run)
from dotenv import load_dotenv
load_dotenv()

from src.helper import download_embeddings
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from src.prompt import system_prompt

from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from datasets import Dataset

# 1) rebuild your RAG pipeline
embeddings = download_embeddings()
docsearch = PineconeVectorStore.from_existing_index(
    index_name="virtual-doc",
    embedding=embeddings
)
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.1)
prompt = ChatPromptTemplate.from_messages(
    [("system", system_prompt), ("human", "{input}")]
)
qa_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, qa_chain)

# 2) questions
questions = [
    "Why is ultrasound considered safer than x-ray imaging for the abdomen?",
    "Does achalasia typically reduce life expectancy?",
    "What is the difference between septic and sterile abscesses?",
    "Give two prevention tips to minimize acne flare-ups.",
    "What is stereotactic radiation (radiosurgery) for acoustic neuroma?",
    "What is the general prognosis for someone with a unilateral acoustic neuroma?",
    "Why can abscesses of the hand be especially serious?",
    "What is the first-line treatment for achalasia?",
    "What is the most common symptom of achalasia?",
    "I got sudden fever, what should I do?"
]

# 3) collect contexts + answers (use retriever.invoke)
rows = []
for q in questions:
    ctx_docs = retriever.invoke(q)                 # <- new API (list[Document])
    ctx_texts = [d.page_content for d in ctx_docs]
    out = rag_chain.invoke({"input": q})
    rows.append({"question": q, "answer": out["answer"], "contexts": ctx_texts})

ds = Dataset.from_list(rows)

# 4) evaluate metrics that don't need references
result = evaluate(
    ds,
    metrics=[faithfulness, answer_relevancy],
    llm=llm,
    embeddings=embeddings
)

print("\n=== RAGAS SUMMARY (no-reference metrics) ===")
print(result)