import gradio as gr
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFaceEmbeddings

os.environ["TORCH_USE_CUDA_DSA"] = "1"

# Load Hugging Face model & tokenizer
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")

# Paths (Hugging Face Spaces uses persistent storage at /data)
CHROMA_DB_PATH = "./chroma_db"
UPLOAD_FOLDER = "./uploaded_files"

# Ensure directories exist
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(CHROMA_DB_PATH, exist_ok=True)

# Load embedding function
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load ChromaDB (persistent across sessions)
vector_db = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embedding_function)

# Prompt Template
PROMPT_TEMPLATE = """
You are an expert research assistant. Use the provided context to answer the query. 
If unsure, state that you don't know. Be concise and factual (max 3 sentences).
Query: {user_query} 
Context: {document_context} 
Answer:
"""

# Function to process and store uploaded file
def process_and_store_file(file_path):
    loader = TextLoader(file_path)
    raw_docs = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    document_chunks = text_splitter.split_documents(raw_docs)
    
    vector_db.add_documents(document_chunks)

# Function to search documents in ChromaDB
def find_related_documents(query):
    return vector_db.similarity_search(query, k=3)

# Function to generate answers using Hugging Face Model
def generate_answer(question, uploaded_file=None):
    # Process file if uploaded
    if uploaded_file is not None:
        file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.read())
        process_and_store_file(file_path)

    # Search for relevant documents
    relevant_docs = find_related_documents(question)
    context = "\n\n".join([doc.page_content for doc in relevant_docs]) if relevant_docs else "No relevant documents found."

    # Prepare prompt
    prompt = PROMPT_TEMPLATE.format(user_query=question, document_context=context)

    # Tokenize & generate response
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_length=200, num_return_sequences=1, temperature=0.7)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1].strip()

    return answer

# Gradio UI
with gr.Blocks(title="DocuMind AI - DeepSeek Qwen Chatbot") as demo:
    gr.Markdown("### 📘 DocuMind AI - Ask Questions Based on Uploaded Documents")
    
    with gr.Row():
        question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
        file_input = gr.File(label="Upload a Text File (Optional)", file_types=[".txt"])

    submit_btn = gr.Button("Get Answer")
    output_text = gr.Textbox(label="Answer", interactive=False)

    submit_btn.click(generate_answer, inputs=[question_input, file_input], outputs=output_text)

demo.launch()