Spaces:

Punit1
/

pdf-analyzer

Sleeping

App Files Files Community

pdf-analyzer / app.py

Punit1

Update app.py

994012d verified 8 days ago

raw

history blame contribute delete

5.47 kB

	import gradio as gr
	import torch
	import faiss
	import numpy as np
	import logging
	import time
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from sentence_transformers import SentenceTransformer
	from pypdf import PdfReader

	# =====================================================
	# LOGGING CONFIGURATION
	# =====================================================

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s"
	)
	logger = logging.getLogger(__name__)

	logger.info("Starting application...")

	# =====================================================
	# DEVICE CONFIG
	# =====================================================

	DEVICE = "cpu"
	torch.set_num_threads(4)

	# =====================================================
	# LOAD EMBEDDING MODEL
	# =====================================================

	logger.info("Loading embedding model...")
	embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	logger.info("Embedding model loaded.")

	# =====================================================
	# LOAD PHI-3 MODEL
	# =====================================================

	MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

	logger.info("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	logger.info("Loading Phi-3 model (CPU optimized)...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True
	)

	model.to(DEVICE)
	model.eval()

	logger.info("Model loaded successfully.")

	# =====================================================
	# GLOBAL STORAGE
	# =====================================================

	chunks = []
	faiss_index = None

	# =====================================================
	# PDF PROCESSING
	# =====================================================

	def process_pdf(file):
	global chunks, faiss_index

	logger.info("Processing PDF...")

	reader = PdfReader(file)
	full_text = ""

	for page in reader.pages:
	text = page.extract_text()
	if text:
	full_text += text + "\n"

	if not full_text.strip():
	return "❌ Could not extract text from PDF."

	# Chunking
	chunk_size = 400
	chunks = [
	full_text[i:i+chunk_size]
	for i in range(0, len(full_text), chunk_size)
	]

	logger.info(f"Created {len(chunks)} chunks.")

	# Embeddings
	embeddings = embed_model.encode(chunks, convert_to_numpy=True)

	dimension = embeddings.shape[1]
	faiss_index = faiss.IndexFlatL2(dimension)
	faiss_index.add(embeddings)

	logger.info("FAISS index built successfully.")

	return f"✅ PDF processed successfully ({len(chunks)} chunks created)."

	# =====================================================
	# CHAT FUNCTION
	# =====================================================

	def generate_answer(message, history):
	global chunks, faiss_index

	if faiss_index is None:
	return "⚠ Please upload and process a PDF first."

	logger.info(f"Received question: {message}")

	start_time = time.time()

	# Step 1: Embed Query
	query_embedding = embed_model.encode([message], convert_to_numpy=True)

	# Step 2: Retrieve top 2 chunks
	distances, indices = faiss_index.search(query_embedding, k=2)

	retrieved_context = "\n\n".join(
	[chunks[i] for i in indices[0]]
	)

	logger.info("Retrieved relevant context.")

	# Step 3: Create structured prompt
	prompt = f"""
	<\|system\|>
	You are a professional AI assistant.
	Provide clear, structured, intelligent answers.
	Keep answers concise but informative.
	If information is missing in context, say so.
	<\|end\|>

	<\|user\|>
	Context:
	{retrieved_context}

	Question:
	{message}
	<\|end\|>

	<\|assistant\|>
	"""

	inputs = tokenizer(prompt, return_tensors="pt")

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=150,
	temperature=0.5,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	use_cache=True
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	answer = response.split("<\|assistant\|>")[-1].strip()

	elapsed = time.time() - start_time
	logger.info(f"Response generated in {elapsed:.2f} seconds.")

	return answer

	# =====================================================
	# GRADIO UI
	# =====================================================

	with gr.Blocks() as demo:

	gr.Markdown("# 🤖 Smart RAG Assistant")
	gr.Markdown("Upload a PDF and chat intelligently using Phi-3 Mini.")

	with gr.Row():

	with gr.Column(scale=1):
	pdf_file = gr.File(label="Upload PDF")
	upload_btn = gr.Button("Process PDF")
	status = gr.Markdown()

	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=600)
	msg = gr.Textbox(
	placeholder="Ask something about the document..."
	)
	clear = gr.Button("Clear Chat")

	upload_btn.click(
	process_pdf,
	inputs=pdf_file,
	outputs=status
	)

	def respond(message, chat_history):
	answer = generate_answer(message, chat_history)
	chat_history.append((message, answer))
	return "", chat_history

	msg.submit(
	respond,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot]
	)

	clear.click(lambda: [], None, chatbot)

	demo.launch(theme=gr.themes.Soft())