import gradio as gr import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer from langchain_community.vectorstores import Chroma from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain_huggingface import HuggingFaceEmbeddings os.environ["TORCH_USE_CUDA_DSA"] = "1" # Load Hugging Face model & tokenizer MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto") # Paths (Hugging Face Spaces uses persistent storage at /data) CHROMA_DB_PATH = "./chroma_db" UPLOAD_FOLDER = "./uploaded_files" # Ensure directories exist os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(CHROMA_DB_PATH, exist_ok=True) # Load embedding function embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Load ChromaDB (persistent across sessions) vector_db = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embedding_function) # Prompt Template PROMPT_TEMPLATE = """ You are an expert research assistant. Use the provided context to answer the query. If unsure, state that you don't know. Be concise and factual (max 3 sentences). Query: {user_query} Context: {document_context} Answer: """ # Function to process and store uploaded file def process_and_store_file(file_path): loader = TextLoader(file_path) raw_docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) document_chunks = text_splitter.split_documents(raw_docs) vector_db.add_documents(document_chunks) # Function to search documents in ChromaDB def find_related_documents(query): return vector_db.similarity_search(query, k=3) # Function to generate answers using Hugging Face Model def generate_answer(question, uploaded_file=None): # Process file if uploaded if uploaded_file is not None: file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.read()) process_and_store_file(file_path) # Search for relevant documents relevant_docs = find_related_documents(question) context = "\n\n".join([doc.page_content for doc in relevant_docs]) if relevant_docs else "No relevant documents found." # Prepare prompt prompt = PROMPT_TEMPLATE.format(user_query=question, document_context=context) # Tokenize & generate response inputs = tokenizer(prompt, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_length=200, num_return_sequences=1, temperature=0.7) answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1].strip() return answer # Gradio UI with gr.Blocks(title="DocuMind AI - DeepSeek Qwen Chatbot") as demo: gr.Markdown("### 📘 DocuMind AI - Ask Questions Based on Uploaded Documents") with gr.Row(): question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...") file_input = gr.File(label="Upload a Text File (Optional)", file_types=[".txt"]) submit_btn = gr.Button("Get Answer") output_text = gr.Textbox(label="Answer", interactive=False) submit_btn.click(generate_answer, inputs=[question_input, file_input], outputs=output_text) demo.launch()