import gradio as gr import torch import faiss import numpy as np import logging import time from transformers import AutoTokenizer, AutoModelForCausalLM from sentence_transformers import SentenceTransformer from pypdf import PdfReader # ===================================================== # LOGGING CONFIGURATION # ===================================================== logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) logger.info("Starting application...") # ===================================================== # DEVICE CONFIG # ===================================================== DEVICE = "cpu" torch.set_num_threads(4) # ===================================================== # LOAD EMBEDDING MODEL # ===================================================== logger.info("Loading embedding model...") embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") logger.info("Embedding model loaded.") # ===================================================== # LOAD PHI-3 MODEL # ===================================================== MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" logger.info("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) logger.info("Loading Phi-3 model (CPU optimized)...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, low_cpu_mem_usage=True ) model.to(DEVICE) model.eval() logger.info("Model loaded successfully.") # ===================================================== # GLOBAL STORAGE # ===================================================== chunks = [] faiss_index = None # ===================================================== # PDF PROCESSING # ===================================================== def process_pdf(file): global chunks, faiss_index logger.info("Processing PDF...") reader = PdfReader(file) full_text = "" for page in reader.pages: text = page.extract_text() if text: full_text += text + "\n" if not full_text.strip(): return "❌ Could not extract text from PDF." # Chunking chunk_size = 400 chunks = [ full_text[i:i+chunk_size] for i in range(0, len(full_text), chunk_size) ] logger.info(f"Created {len(chunks)} chunks.") # Embeddings embeddings = embed_model.encode(chunks, convert_to_numpy=True) dimension = embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dimension) faiss_index.add(embeddings) logger.info("FAISS index built successfully.") return f"✅ PDF processed successfully ({len(chunks)} chunks created)." # ===================================================== # CHAT FUNCTION # ===================================================== def generate_answer(message, history): global chunks, faiss_index if faiss_index is None: return "⚠ Please upload and process a PDF first." logger.info(f"Received question: {message}") start_time = time.time() # Step 1: Embed Query query_embedding = embed_model.encode([message], convert_to_numpy=True) # Step 2: Retrieve top 2 chunks distances, indices = faiss_index.search(query_embedding, k=2) retrieved_context = "\n\n".join( [chunks[i] for i in indices[0]] ) logger.info("Retrieved relevant context.") # Step 3: Create structured prompt prompt = f""" <|system|> You are a professional AI assistant. Provide clear, structured, intelligent answers. Keep answers concise but informative. If information is missing in context, say so. <|end|> <|user|> Context: {retrieved_context} Question: {message} <|end|> <|assistant|> """ inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=150, temperature=0.5, top_p=0.9, repetition_penalty=1.1, do_sample=True, use_cache=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) answer = response.split("<|assistant|>")[-1].strip() elapsed = time.time() - start_time logger.info(f"Response generated in {elapsed:.2f} seconds.") return answer # ===================================================== # GRADIO UI # ===================================================== with gr.Blocks() as demo: gr.Markdown("# 🤖 Smart RAG Assistant") gr.Markdown("Upload a PDF and chat intelligently using Phi-3 Mini.") with gr.Row(): with gr.Column(scale=1): pdf_file = gr.File(label="Upload PDF") upload_btn = gr.Button("Process PDF") status = gr.Markdown() with gr.Column(scale=3): chatbot = gr.Chatbot(height=600) msg = gr.Textbox( placeholder="Ask something about the document..." ) clear = gr.Button("Clear Chat") upload_btn.click( process_pdf, inputs=pdf_file, outputs=status ) def respond(message, chat_history): answer = generate_answer(message, chat_history) chat_history.append((message, answer)) return "", chat_history msg.submit( respond, inputs=[msg, chatbot], outputs=[msg, chatbot] ) clear.click(lambda: [], None, chatbot) demo.launch(theme=gr.themes.Soft())