File size: 5,895 Bytes
f37b2ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Get Hugging Face token from environment (set in Spaces Secrets)
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")

# Model repository details
HF_USERNAME = "khysam2022"
HF_MODEL_NAME = "RAG-DSE-PAST-PAPER-2012-ICT"
MODEL_REPO = f"{HF_USERNAME}/{HF_MODEL_NAME}"

# Global variables for model and vectorstore
model = None
tokenizer = None
vectorstore = None

def load_model():
    """Load the model and tokenizer"""
    global model, tokenizer
    
    if model is not None and tokenizer is not None:
        return model, tokenizer
    
    print(f"Loading model {MODEL_REPO}...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, token=HF_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_REPO,
        token=HF_TOKEN,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    return model, tokenizer

def process_pdf(pdf_file):
    """Process a PDF for RAG"""
    global vectorstore
    
    try:
        # Save the uploaded file
        pdf_path = "uploaded_document.pdf"
        with open(pdf_path, "wb") as f:
            f.write(pdf_file)
        
        # Load and split the PDF
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            separators=["\n\n", "\n", " ", ""]
        )
        chunks = text_splitter.split_documents(documents)
        
        # Create embeddings and vectorstore
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = FAISS.from_documents(chunks, embeddings)
        
        # Cleanup
        if os.path.exists(pdf_path):
            os.remove(pdf_path)
        
        return f"✅ PDF processed successfully! Found {len(chunks)} text chunks."
    except Exception as e:
        return f"❌ Error processing PDF: {str(e)}"

def generate_answer(query):
    """Generate answer using the model"""
    if model is None or tokenizer is None:
        try:
            load_model()
        except Exception as e:
            return f"❌ Error loading model: {str(e)}"
    
    if vectorstore is None:
        return "Please upload a PDF document first."
    
    try:
        # Retrieve relevant context
        relevant_docs = vectorstore.similarity_search(query, k=3)
        context = "\n\n".join([doc.page_content for doc in relevant_docs])
        
        # Create prompt with context
        prompt = f"""
You are a helpful assistant analyzing a document. Using only the provided context, answer the question.

Context:
{context}

Question: {query}

Answer:
"""
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
        )
        
        # Decode and return response
        response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
        return response
        
    except Exception as e:
        return f"❌ Error generating answer: {str(e)}"

def direct_query(message):
    """Direct query without RAG"""
    if model is None or tokenizer is None:
        try:
            load_model()
        except Exception as e:
            return f"❌ Error loading model: {str(e)}"
    
    try:
        # Create prompt
        prompt = f"User: {message}\nAssistant: "
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
        )
        
        # Decode and return response
        response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
        return response
        
    except Exception as e:
        return f"❌ Error generating answer: {str(e)}"

# Define Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# RAG-DSE-PAST-PAPER-2012-ICT")
    gr.Markdown("This demo allows you to chat with the model and ask questions about uploaded documents.")
    
    with gr.Tab("RAG Query"):
        with gr.Row():
            with gr.Column():
                pdf_upload = gr.File(label="Upload PDF Document")
                process_button = gr.Button("Process Document")
                status_text = gr.Textbox(label="Processing Status", interactive=False)
                
                process_button.click(process_pdf, inputs=[pdf_upload], outputs=[status_text])
            
            with gr.Column():
                query_input = gr.Textbox(label="Your Question", placeholder="Ask a question about the document...")
                query_button = gr.Button("Ask Question")
                answer_output = gr.Textbox(label="Answer", interactive=False)
                
                query_button.click(generate_answer, inputs=[query_input], outputs=[answer_output])
    
    with gr.Tab("Direct Chat"):
        chat_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
        chat_button = gr.Button("Send Message")
        chat_output = gr.Textbox(label="Response", interactive=False)
        
        chat_button.click(direct_query, inputs=[chat_input], outputs=[chat_output])

# Launch the app
demo.launch()