import chromadb
import os
from langchain_chroma import Chroma
from chromadb.config import  DEFAULT_DATABASE, DEFAULT_TENANT
import time
import transformers
from langchain_community.llms import CTransformers
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama


client = chromadb.HttpClient("http://localhost:8000")


def using_ollama_model(retriever, query, results,conversation_history):

    history_text = ""
    for item in conversation_history:
        if "question" in item and item["question"]:
            history_text += f"User: {item['question']}\n"
        if "answer" in item and item["answer"]:
            history_text += f"Assistant: {item['answer']}\n"

    print("<<<<<< LLM MODEL STARTED >>>>>>")
    print(" ========>", history_text)
    # Ensure the prompt template is well-structured
    prompt_template = """
    You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
    If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
    Conversation History :{history} 
    Context:{results}
    Question:{query}
    """

    # Initialize the PromptTemplate

    template = PromptTemplate(
        input_variables=["history","results", "query"], template=prompt_template,
    )

    doc_texts = "\\n".join([doc.page_content for doc in results])

    formatted_output = template.format(history=history_text,results=doc_texts, query=query)

    print("<<<<<<<<<<< Formatted Output >>>>>>>>>>>")
    print(formatted_output)
    print("type of formatted output is ", type(formatted_output))


    llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)

    rag_chain = template | llm | StrOutputParser()

    # results = retriever.invoke(query)
    # doc_texts = "\\n".join([doc.page_content for doc in results])

    answer = rag_chain.invoke({"history" : history_text,"results": doc_texts, "query": query})

    return answer

    # # Set up the RAG pipeline
    # rag_pipeline = RetrievalQAWithSourcesChain.from_chain_type(
    #     llm=llm, chain_type="stuff", retriever=retriever
    # )
    #
    # try:
    # #     # answer = rag_pipeline.run(formatted_output)
    #     answer = rag_pipeline.invoke(formatted_output)
    #     return answer
    # except Exception as e:
        # print(f"Error occurred during invocation: {e}")
        # return None


def retrievingReponse(docId, query, conversation_history) :
    
    model_kwargs = {"device": "mps"}
    encode_kwargs = {"normalize_embeddings": True}
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )

    vectorDB = Chroma(
        collection_name="embeddings",
        embedding_function=embeddings,  # Using the encode method to get embeddings
        persist_directory="MM_CHROMA_DB",
    )

    # retriever = vectorDB.as_retriever(
    # search_type="mmr",
    # search_kwargs={
    #     "k": 6, # was 5 originally
    #     "lambda_mult": 1, # was 0.30 originally
    #     "filter": {"docId": docId}
    # }
    # )
    retriever = vectorDB.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 4, # was 5 originally
        # "lambda_mult": 1, # was 0.30 originally
        "filter": {"docId": docId}
    }
    )

      # retriever = vectorDB.as_retriever()
    print("<<<<<<<<<<<<<<<< Retriever >>>>>>>>>>>>>>>>")
    # print("d",retriever)
    print("\n")

    results = retriever.invoke(
        query
    ) 

    unique_results = []
    seen_texts = set()

    for result in results:
        print(result)
        # If the result's content has not been seen before, process it
        if result.page_content not in seen_texts:
            ans = result.page_content
            ans = ans.replace("\n", "")  # Clean the content by removing newlines
            unique_results.append(ans)  # Add the cleaned answer to the results list
            seen_texts.add(result.page_content)  # Mark this text as seen

    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    start = time.time()

    # llm_result = using_llm_model(retriever, query, results)
    llm_result = using_ollama_model(retriever, query, results, conversation_history)
    end = time.time()
    print("Inference Time:>>>>>>> ", end - start)
    return llm_result