import os import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Get Hugging Face token from environment (set in Spaces Secrets) HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN") # Model repository details HF_USERNAME = "khysam2022" HF_MODEL_NAME = "RAG-DSE-PAST-PAPER-2012-ICT" MODEL_REPO = f"{HF_USERNAME}/{HF_MODEL_NAME}" # Global variables for model and vectorstore model = None tokenizer = None vectorstore = None def load_model(): """Load the model and tokenizer""" global model, tokenizer if model is not None and tokenizer is not None: return model, tokenizer print(f"Loading model {MODEL_REPO}...") tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( MODEL_REPO, token=HF_TOKEN, torch_dtype=torch.float16, device_map="auto" ) return model, tokenizer def process_pdf(pdf_file): """Process a PDF for RAG""" global vectorstore try: # Save the uploaded file pdf_path = "uploaded_document.pdf" with open(pdf_path, "wb") as f: f.write(pdf_file) # Load and split the PDF loader = PyPDFLoader(pdf_path) documents = loader.load() # Split documents into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", " ", ""] ) chunks = text_splitter.split_documents(documents) # Create embeddings and vectorstore embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(chunks, embeddings) # Cleanup if os.path.exists(pdf_path): os.remove(pdf_path) return f"✅ PDF processed successfully! Found {len(chunks)} text chunks." except Exception as e: return f"❌ Error processing PDF: {str(e)}" def generate_answer(query): """Generate answer using the model""" if model is None or tokenizer is None: try: load_model() except Exception as e: return f"❌ Error loading model: {str(e)}" if vectorstore is None: return "Please upload a PDF document first." try: # Retrieve relevant context relevant_docs = vectorstore.similarity_search(query, k=3) context = "\n\n".join([doc.page_content for doc in relevant_docs]) # Create prompt with context prompt = f""" You are a helpful assistant analyzing a document. Using only the provided context, answer the question. Context: {context} Question: {query} Answer: """ # Generate response inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=300, do_sample=True, temperature=0.7, top_p=0.9, ) # Decode and return response response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) return response except Exception as e: return f"❌ Error generating answer: {str(e)}" def direct_query(message): """Direct query without RAG""" if model is None or tokenizer is None: try: load_model() except Exception as e: return f"❌ Error loading model: {str(e)}" try: # Create prompt prompt = f"User: {message}\nAssistant: " # Generate response inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=300, do_sample=True, temperature=0.7, top_p=0.9, ) # Decode and return response response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) return response except Exception as e: return f"❌ Error generating answer: {str(e)}" # Define Gradio interface with gr.Blocks() as demo: gr.Markdown("# RAG-DSE-PAST-PAPER-2012-ICT") gr.Markdown("This demo allows you to chat with the model and ask questions about uploaded documents.") with gr.Tab("RAG Query"): with gr.Row(): with gr.Column(): pdf_upload = gr.File(label="Upload PDF Document") process_button = gr.Button("Process Document") status_text = gr.Textbox(label="Processing Status", interactive=False) process_button.click(process_pdf, inputs=[pdf_upload], outputs=[status_text]) with gr.Column(): query_input = gr.Textbox(label="Your Question", placeholder="Ask a question about the document...") query_button = gr.Button("Ask Question") answer_output = gr.Textbox(label="Answer", interactive=False) query_button.click(generate_answer, inputs=[query_input], outputs=[answer_output]) with gr.Tab("Direct Chat"): chat_input = gr.Textbox(label="Your Message", placeholder="Type your message here...") chat_button = gr.Button("Send Message") chat_output = gr.Textbox(label="Response", interactive=False) chat_button.click(direct_query, inputs=[chat_input], outputs=[chat_output]) # Launch the app demo.launch()