Spaces:
Sleeping
Sleeping
File size: 5,470 Bytes
e2d2e34 e6c2792 994012d fd5c04a 994012d e2d2e34 994012d e6c2792 994012d e6c2792 994012d e6c2792 994012d e6c2792 994012d e2d2e34 994012d e6c2792 994012d e6c2792 994012d e6c2792 994012d e2d2e34 994012d e2d2e34 994012d fd5c04a 994012d e6c2792 fd5c04a 994012d e6c2792 e2d2e34 994012d e6c2792 994012d e2d2e34 994012d e2d2e34 994012d e2d2e34 994012d e2d2e34 994012d e6c2792 994012d e2d2e34 994012d fd5c04a e6c2792 994012d fd5c04a e6c2792 994012d e2d2e34 994012d e2d2e34 994012d fd5c04a 994012d e6c2792 e2d2e34 e6c2792 994012d e2d2e34 e6c2792 fd5c04a e6c2792 e2d2e34 e6c2792 e2d2e34 e6c2792 994012d e6c2792 994012d e6c2792 e2d2e34 e6c2792 994012d e6c2792 994012d e6c2792 994012d fd5c04a 994012d fd5c04a 994012d fd5c04a 994012d fd5c04a 994012d fd5c04a 994012d e2d2e34 994012d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
import gradio as gr
import torch
import faiss
import numpy as np
import logging
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
# =====================================================
# LOGGING CONFIGURATION
# =====================================================
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
logger.info("Starting application...")
# =====================================================
# DEVICE CONFIG
# =====================================================
DEVICE = "cpu"
torch.set_num_threads(4)
# =====================================================
# LOAD EMBEDDING MODEL
# =====================================================
logger.info("Loading embedding model...")
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
logger.info("Embedding model loaded.")
# =====================================================
# LOAD PHI-3 MODEL
# =====================================================
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
logger.info("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
logger.info("Loading Phi-3 model (CPU optimized)...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
model.to(DEVICE)
model.eval()
logger.info("Model loaded successfully.")
# =====================================================
# GLOBAL STORAGE
# =====================================================
chunks = []
faiss_index = None
# =====================================================
# PDF PROCESSING
# =====================================================
def process_pdf(file):
global chunks, faiss_index
logger.info("Processing PDF...")
reader = PdfReader(file)
full_text = ""
for page in reader.pages:
text = page.extract_text()
if text:
full_text += text + "\n"
if not full_text.strip():
return "❌ Could not extract text from PDF."
# Chunking
chunk_size = 400
chunks = [
full_text[i:i+chunk_size]
for i in range(0, len(full_text), chunk_size)
]
logger.info(f"Created {len(chunks)} chunks.")
# Embeddings
embeddings = embed_model.encode(chunks, convert_to_numpy=True)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)
logger.info("FAISS index built successfully.")
return f"✅ PDF processed successfully ({len(chunks)} chunks created)."
# =====================================================
# CHAT FUNCTION
# =====================================================
def generate_answer(message, history):
global chunks, faiss_index
if faiss_index is None:
return "⚠ Please upload and process a PDF first."
logger.info(f"Received question: {message}")
start_time = time.time()
# Step 1: Embed Query
query_embedding = embed_model.encode([message], convert_to_numpy=True)
# Step 2: Retrieve top 2 chunks
distances, indices = faiss_index.search(query_embedding, k=2)
retrieved_context = "\n\n".join(
[chunks[i] for i in indices[0]]
)
logger.info("Retrieved relevant context.")
# Step 3: Create structured prompt
prompt = f"""
<|system|>
You are a professional AI assistant.
Provide clear, structured, intelligent answers.
Keep answers concise but informative.
If information is missing in context, say so.
<|end|>
<|user|>
Context:
{retrieved_context}
Question:
{message}
<|end|>
<|assistant|>
"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.5,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True,
use_cache=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
answer = response.split("<|assistant|>")[-1].strip()
elapsed = time.time() - start_time
logger.info(f"Response generated in {elapsed:.2f} seconds.")
return answer
# =====================================================
# GRADIO UI
# =====================================================
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Smart RAG Assistant")
gr.Markdown("Upload a PDF and chat intelligently using Phi-3 Mini.")
with gr.Row():
with gr.Column(scale=1):
pdf_file = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
status = gr.Markdown()
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=600)
msg = gr.Textbox(
placeholder="Ask something about the document..."
)
clear = gr.Button("Clear Chat")
upload_btn.click(
process_pdf,
inputs=pdf_file,
outputs=status
)
def respond(message, chat_history):
answer = generate_answer(message, chat_history)
chat_history.append((message, answer))
return "", chat_history
msg.submit(
respond,
inputs=[msg, chatbot],
outputs=[msg, chatbot]
)
clear.click(lambda: [], None, chatbot)
demo.launch(theme=gr.themes.Soft())
|