import gradio as gr import uuid import sqlite3 import json import re import PyPDF2 import numpy as np from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from sklearn.metrics.pairwise import cosine_similarity import os from database1 import create_db from first1 import pdf_query from ans_generator1 import AnswerGenerator import sqlite3, json from q_generator1 import QGenerator from transformers import pipeline # Initialize models qgen = QGenerator() ansgen = AnswerGenerator() tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False) model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base") qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer) # ā Upload and process PDF def upload_pdf(files): try: messages = [] for file in files: filename = os.path.basename(file.name) # <-- FIXED HERE token = str(uuid.uuid4()) pdf_reader = PyPDF2.PdfReader(file) text = "".join([page.extract_text() or "" for page in pdf_reader.pages]) chunks = [text[i:i + 500] for i in range(0, len(text), 500)] create_db(token, chunks, filename) messages.append(f"ā File Name: {filename}\nš Token: {token}") return "\n\n".join(messages) except Exception as e: return f"ā Error: {str(e)}" # ā Generate Questions & Answers def generate_qa(token): try: if not token: return "ā ļø Please provide a token." print("š„ Received Token:", token) row=create_db.fetch_by_token_or_filename(token) if not row: return "ā No data found for this token." chunks = json.loads(row['chunk_data']) qa_pairs = [] for i, chunk in enumerate(chunks): print(f"\nš¹ Processing chunk {i+1}/{len(chunks)}") questions = qgen.generate(chunk) print(f"š§ Questions generated: {questions}") if not questions: continue for question in questions[:1]: prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:" try: result = qa_model(prompt, max_length=256, do_sample=False) if isinstance(result, list) and "generated_text" in result[0]: answer = result[0]["generated_text"].strip() else: answer = "N/A" qa_pairs.append(f"Q: {question}\nA: {answer}") except Exception as e: continue return "\n\n".join(qa_pairs) if qa_pairs else "ā ļø No Q&A pairs generated." except Exception as e: return f"ā Error: {str(e)}" # ā Ask a question using token def ask_question(token, question): try: row=create_db.fetch_by_token_or_filename(token) if not row: return "ā Token not found." chunks = json.loads(row['chunk_data']) processor = pdf_query() model = processor.model clean_chunks = [re.sub(r'\s+', ' ', c.strip()) for c in chunks if c.strip()] if not clean_chunks: return "ā ļø No valid content found in PDF." chunk_embeddings = model.encode(clean_chunks) q_embedding = model.encode([question]) scores = cosine_similarity(q_embedding, chunk_embeddings)[0] top_index = int(np.argmax(scores)) top_score = float(scores[top_index]) best_text = clean_chunks[top_index] return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}" except Exception as e: return f"ā Error: {str(e)}" # ā Gradio UI with gr.Blocks(theme="default") as demo: gr.Markdown( """
Upload your PDFs, generate smart questions, and get intelligent answers.