Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| # Model and Tokenizer | |
| MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto") | |
| # Ensure PAD token is set correctly | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load embedding model for RAG | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_store = None | |
| # Function to process PDF and create vector database | |
| def process_pdf(pdf_path): | |
| global vector_store | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) | |
| texts = text_splitter.split_documents(documents) | |
| vector_store = FAISS.from_documents(texts, embedding_model) | |
| return "PDF successfully processed and indexed." | |
| # RAG Query Function | |
| def query_rag(message, temperature, max_new_tokens, top_k, repetition_penalty, top_p, system_prompt, history=None): | |
| if vector_store is None: | |
| return "Please upload and process a PDF first." | |
| # Retrieve relevant chunks | |
| docs = vector_store.similarity_search(message, k=3) | |
| context = "\n".join([doc.page_content for doc in docs]) | |
| # Construct prompt | |
| instruction = f"<|im_start|>system\n{system_prompt}\n<|im_end|>\n" | |
| instruction += f"Relevant context:\n{context}\n" | |
| instruction += f"<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n" | |
| # Tokenization | |
| enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True) | |
| input_ids = enc.input_ids.to(device) | |
| attention_mask = enc.attention_mask.to(device) | |
| # Ensure correct parameter types | |
| temperature = float(temperature) | |
| max_new_tokens = int(max_new_tokens) | |
| top_k = int(top_k) | |
| repetition_penalty = float(repetition_penalty) | |
| top_p = float(top_p) | |
| # Generate response | |
| output_ids = model.generate( | |
| input_ids, | |
| attention_mask=attention_mask, # Fix for attention mask issue | |
| do_sample=True, | |
| temperature=temperature, | |
| max_new_tokens=max_new_tokens, | |
| top_k=top_k, | |
| repetition_penalty=repetition_penalty, | |
| top_p=top_p | |
| ) | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| return response | |
| # Gradio Interface | |
| def launch_interface(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 🤖 RAG Chatbot with DeepSeek") | |
| pdf_uploader = gr.File(label="Upload PDF", type="filepath") | |
| process_btn = gr.Button("Process PDF") | |
| process_output = gr.Textbox(label="Processing Status", interactive=False) | |
| process_btn.click(process_pdf, inputs=[pdf_uploader], outputs=[process_output]) | |
| # Create chat interface with correct argument handling | |
| chatbot = gr.ChatInterface( | |
| lambda message, history: query_rag(message, 0.7, 10000, 40, 1.1, 0.95, "You are a helpful assistant.", history) | |
| ) | |
| demo.launch(share=True) # Enable public link | |
| if __name__ == "__main__": | |
| launch_interface() | |