DeepseekRAG / app.py
Huzaifa424's picture
Update app.py
5f20a58 verified
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
# Model and Tokenizer
MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
# Ensure PAD token is set correctly
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load embedding model for RAG
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = None
# Function to process PDF and create vector database
def process_pdf(pdf_path):
global vector_store
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
vector_store = FAISS.from_documents(texts, embedding_model)
return "PDF successfully processed and indexed."
# RAG Query Function
def query_rag(message, temperature, max_new_tokens, top_k, repetition_penalty, top_p, system_prompt, history=None):
if vector_store is None:
return "Please upload and process a PDF first."
# Retrieve relevant chunks
docs = vector_store.similarity_search(message, k=3)
context = "\n".join([doc.page_content for doc in docs])
# Construct prompt
instruction = f"<|im_start|>system\n{system_prompt}\n<|im_end|>\n"
instruction += f"Relevant context:\n{context}\n"
instruction += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n"
# Tokenization
enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
input_ids = enc.input_ids.to(device)
attention_mask = enc.attention_mask.to(device)
# Ensure correct parameter types
temperature = float(temperature)
max_new_tokens = int(max_new_tokens)
top_k = int(top_k)
repetition_penalty = float(repetition_penalty)
top_p = float(top_p)
# Generate response
output_ids = model.generate(
input_ids,
attention_mask=attention_mask, # Fix for attention mask issue
do_sample=True,
temperature=temperature,
max_new_tokens=max_new_tokens,
top_k=top_k,
repetition_penalty=repetition_penalty,
top_p=top_p
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return response
# Gradio Interface
def launch_interface():
with gr.Blocks() as demo:
gr.Markdown("## 🤖 RAG Chatbot with DeepSeek")
pdf_uploader = gr.File(label="Upload PDF", type="filepath")
process_btn = gr.Button("Process PDF")
process_output = gr.Textbox(label="Processing Status", interactive=False)
process_btn.click(process_pdf, inputs=[pdf_uploader], outputs=[process_output])
# Create chat interface with correct argument handling
chatbot = gr.ChatInterface(
lambda message, history: query_rag(message, 0.7, 10000, 40, 1.1, 0.95, "You are a helpful assistant.", history)
)
demo.launch(share=True) # Enable public link
if __name__ == "__main__":
launch_interface()