Spaces:
Sleeping
Sleeping
File size: 5,895 Bytes
f37b2ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Get Hugging Face token from environment (set in Spaces Secrets)
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
# Model repository details
HF_USERNAME = "khysam2022"
HF_MODEL_NAME = "RAG-DSE-PAST-PAPER-2012-ICT"
MODEL_REPO = f"{HF_USERNAME}/{HF_MODEL_NAME}"
# Global variables for model and vectorstore
model = None
tokenizer = None
vectorstore = None
def load_model():
"""Load the model and tokenizer"""
global model, tokenizer
if model is not None and tokenizer is not None:
return model, tokenizer
print(f"Loading model {MODEL_REPO}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
token=HF_TOKEN,
torch_dtype=torch.float16,
device_map="auto"
)
return model, tokenizer
def process_pdf(pdf_file):
"""Process a PDF for RAG"""
global vectorstore
try:
# Save the uploaded file
pdf_path = "uploaded_document.pdf"
with open(pdf_path, "wb") as f:
f.write(pdf_file)
# Load and split the PDF
loader = PyPDFLoader(pdf_path)
documents = loader.load()
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.split_documents(documents)
# Create embeddings and vectorstore
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
# Cleanup
if os.path.exists(pdf_path):
os.remove(pdf_path)
return f"✅ PDF processed successfully! Found {len(chunks)} text chunks."
except Exception as e:
return f"❌ Error processing PDF: {str(e)}"
def generate_answer(query):
"""Generate answer using the model"""
if model is None or tokenizer is None:
try:
load_model()
except Exception as e:
return f"❌ Error loading model: {str(e)}"
if vectorstore is None:
return "Please upload a PDF document first."
try:
# Retrieve relevant context
relevant_docs = vectorstore.similarity_search(query, k=3)
context = "\n\n".join([doc.page_content for doc in relevant_docs])
# Create prompt with context
prompt = f"""
You are a helpful assistant analyzing a document. Using only the provided context, answer the question.
Context:
{context}
Question: {query}
Answer:
"""
# Generate response
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=300,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
# Decode and return response
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
except Exception as e:
return f"❌ Error generating answer: {str(e)}"
def direct_query(message):
"""Direct query without RAG"""
if model is None or tokenizer is None:
try:
load_model()
except Exception as e:
return f"❌ Error loading model: {str(e)}"
try:
# Create prompt
prompt = f"User: {message}\nAssistant: "
# Generate response
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=300,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
# Decode and return response
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
except Exception as e:
return f"❌ Error generating answer: {str(e)}"
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# RAG-DSE-PAST-PAPER-2012-ICT")
gr.Markdown("This demo allows you to chat with the model and ask questions about uploaded documents.")
with gr.Tab("RAG Query"):
with gr.Row():
with gr.Column():
pdf_upload = gr.File(label="Upload PDF Document")
process_button = gr.Button("Process Document")
status_text = gr.Textbox(label="Processing Status", interactive=False)
process_button.click(process_pdf, inputs=[pdf_upload], outputs=[status_text])
with gr.Column():
query_input = gr.Textbox(label="Your Question", placeholder="Ask a question about the document...")
query_button = gr.Button("Ask Question")
answer_output = gr.Textbox(label="Answer", interactive=False)
query_button.click(generate_answer, inputs=[query_input], outputs=[answer_output])
with gr.Tab("Direct Chat"):
chat_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
chat_button = gr.Button("Send Message")
chat_output = gr.Textbox(label="Response", interactive=False)
chat_button.click(direct_query, inputs=[chat_input], outputs=[chat_output])
# Launch the app
demo.launch() |