Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import faiss | |
| import numpy as np | |
| import logging | |
| import time | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from sentence_transformers import SentenceTransformer | |
| from pypdf import PdfReader | |
| # ===================================================== | |
| # LOGGING CONFIGURATION | |
| # ===================================================== | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s" | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logger.info("Starting application...") | |
| # ===================================================== | |
| # DEVICE CONFIG | |
| # ===================================================== | |
| DEVICE = "cpu" | |
| torch.set_num_threads(4) | |
| # ===================================================== | |
| # LOAD EMBEDDING MODEL | |
| # ===================================================== | |
| logger.info("Loading embedding model...") | |
| embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| logger.info("Embedding model loaded.") | |
| # ===================================================== | |
| # LOAD PHI-3 MODEL | |
| # ===================================================== | |
| MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" | |
| logger.info("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| logger.info("Loading Phi-3 model (CPU optimized)...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| model.to(DEVICE) | |
| model.eval() | |
| logger.info("Model loaded successfully.") | |
| # ===================================================== | |
| # GLOBAL STORAGE | |
| # ===================================================== | |
| chunks = [] | |
| faiss_index = None | |
| # ===================================================== | |
| # PDF PROCESSING | |
| # ===================================================== | |
| def process_pdf(file): | |
| global chunks, faiss_index | |
| logger.info("Processing PDF...") | |
| reader = PdfReader(file) | |
| full_text = "" | |
| for page in reader.pages: | |
| text = page.extract_text() | |
| if text: | |
| full_text += text + "\n" | |
| if not full_text.strip(): | |
| return "❌ Could not extract text from PDF." | |
| # Chunking | |
| chunk_size = 400 | |
| chunks = [ | |
| full_text[i:i+chunk_size] | |
| for i in range(0, len(full_text), chunk_size) | |
| ] | |
| logger.info(f"Created {len(chunks)} chunks.") | |
| # Embeddings | |
| embeddings = embed_model.encode(chunks, convert_to_numpy=True) | |
| dimension = embeddings.shape[1] | |
| faiss_index = faiss.IndexFlatL2(dimension) | |
| faiss_index.add(embeddings) | |
| logger.info("FAISS index built successfully.") | |
| return f"✅ PDF processed successfully ({len(chunks)} chunks created)." | |
| # ===================================================== | |
| # CHAT FUNCTION | |
| # ===================================================== | |
| def generate_answer(message, history): | |
| global chunks, faiss_index | |
| if faiss_index is None: | |
| return "⚠ Please upload and process a PDF first." | |
| logger.info(f"Received question: {message}") | |
| start_time = time.time() | |
| # Step 1: Embed Query | |
| query_embedding = embed_model.encode([message], convert_to_numpy=True) | |
| # Step 2: Retrieve top 2 chunks | |
| distances, indices = faiss_index.search(query_embedding, k=2) | |
| retrieved_context = "\n\n".join( | |
| [chunks[i] for i in indices[0]] | |
| ) | |
| logger.info("Retrieved relevant context.") | |
| # Step 3: Create structured prompt | |
| prompt = f""" | |
| <|system|> | |
| You are a professional AI assistant. | |
| Provide clear, structured, intelligent answers. | |
| Keep answers concise but informative. | |
| If information is missing in context, say so. | |
| <|end|> | |
| <|user|> | |
| Context: | |
| {retrieved_context} | |
| Question: | |
| {message} | |
| <|end|> | |
| <|assistant|> | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=150, | |
| temperature=0.5, | |
| top_p=0.9, | |
| repetition_penalty=1.1, | |
| do_sample=True, | |
| use_cache=True | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| answer = response.split("<|assistant|>")[-1].strip() | |
| elapsed = time.time() - start_time | |
| logger.info(f"Response generated in {elapsed:.2f} seconds.") | |
| return answer | |
| # ===================================================== | |
| # GRADIO UI | |
| # ===================================================== | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🤖 Smart RAG Assistant") | |
| gr.Markdown("Upload a PDF and chat intelligently using Phi-3 Mini.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_file = gr.File(label="Upload PDF") | |
| upload_btn = gr.Button("Process PDF") | |
| status = gr.Markdown() | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot(height=600) | |
| msg = gr.Textbox( | |
| placeholder="Ask something about the document..." | |
| ) | |
| clear = gr.Button("Clear Chat") | |
| upload_btn.click( | |
| process_pdf, | |
| inputs=pdf_file, | |
| outputs=status | |
| ) | |
| def respond(message, chat_history): | |
| answer = generate_answer(message, chat_history) | |
| chat_history.append((message, answer)) | |
| return "", chat_history | |
| msg.submit( | |
| respond, | |
| inputs=[msg, chatbot], | |
| outputs=[msg, chatbot] | |
| ) | |
| clear.click(lambda: [], None, chatbot) | |
| demo.launch(theme=gr.themes.Soft()) | |