File size: 5,447 Bytes
b183b9a
 
 
 
 
 
 
 
 
7d9290b
b183b9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d9290b
b183b9a
 
 
 
 
 
878db1c
9a5d0df
b183b9a
 
 
 
 
 
 
 
 
 
 
 
 
 
ef5c1a2
b183b9a
 
 
 
26e031f
b183b9a
 
 
 
 
 
 
 
 
 
0e93d89
b183b9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48c487d
 
b183b9a
 
 
 
48c487d
b183b9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a5d0df
 
30f87cd
9a5d0df
b183b9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
import uuid
import sqlite3
import json
import re
import PyPDF2
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.metrics.pairwise import cosine_similarity
import os
from database1 import create_db
from first1 import pdf_query

from ans_generator1 import AnswerGenerator

import sqlite3, json
from q_generator1 import QGenerator
from transformers import pipeline
# Initialize models
qgen = QGenerator()
ansgen = AnswerGenerator()
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# ✅ Upload and process PDF
def upload_pdf(files):
    try:
        messages = []

        for file in files:
            filename = os.path.basename(file.name)  # <-- FIXED HERE
            token = str(uuid.uuid4())

            pdf_reader = PyPDF2.PdfReader(file)
            text = "".join([page.extract_text() or "" for page in pdf_reader.pages])
            chunks = [text[i:i + 500] for i in range(0, len(text), 500)]

            create_db(token, chunks, filename)
            messages.append(f"✅ File Name: {filename}\n🔑 Token: {token}")

        return "\n\n".join(messages)

    except Exception as e:
        return f"❌ Error: {str(e)}"

# ✅ Generate Questions & Answers
def generate_qa(token):
    try:
        if not token:
            return "⚠️ Please provide a token."

        print("📥 Received Token:", token)

        row=create_db.fetch_by_token_or_filename(token)

        if not row:
            return "❌ No data found for this token."

        chunks = json.loads(row['chunk_data'])
        qa_pairs = []

        for i, chunk in enumerate(chunks):
            print(f"\n🔹 Processing chunk {i+1}/{len(chunks)}")
            questions = qgen.generate(chunk)
            print(f"🧠 Questions generated: {questions}")

            if not questions:
                continue

            for question in questions[:1]:
                prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
                try:
                    result = qa_model(prompt, max_length=256, do_sample=False)
                    if isinstance(result, list) and "generated_text" in result[0]:
                        answer = result[0]["generated_text"].strip()
                    else:
                        answer = "N/A"

                    qa_pairs.append(f"Q: {question}\nA: {answer}")

                except Exception as e:
                    continue

        return "\n\n".join(qa_pairs) if qa_pairs else "⚠️ No Q&A pairs generated."

    except Exception as e:
        return f"❌ Error: {str(e)}"

# ✅ Ask a question using token
def ask_question(token, question):
    try:
           
        row=create_db.fetch_by_token_or_filename(token)

        if not row:
            return "❌ Token not found."

        chunks = json.loads(row['chunk_data'])
        processor = pdf_query()
        model = processor.model

        clean_chunks = [re.sub(r'\s+', ' ', c.strip()) for c in chunks if c.strip()]
        if not clean_chunks:
            return "⚠️ No valid content found in PDF."

        chunk_embeddings = model.encode(clean_chunks)
        q_embedding = model.encode([question])
        scores = cosine_similarity(q_embedding, chunk_embeddings)[0]

        top_index = int(np.argmax(scores))
        top_score = float(scores[top_index])
        best_text = clean_chunks[top_index]

        return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"

    except Exception as e:
        return f"❌ Error: {str(e)}"

# ✅ Gradio UI
with gr.Blocks(theme="default") as demo:
    gr.Markdown(
        """
        <div style='text-align: center; padding: 1rem;'>
            <h1 style='color: #3b82f6;'>📄 AI-Powered PDF Q&A System</h1>
            <p style='font-size: 1.1rem;'>Upload your PDFs, generate smart questions, and get intelligent answers.</p>
        </div>
        """
    )

    with gr.Tab("📤 1. Upload PDF"):
        gr.Markdown("### 🗂 Upload a PDF File")
        file = gr.File(label="Upload one or more PDFs", file_types=[".pdf"], file_count="multiple")
        upload_out = gr.Markdown() 
        file.change(fn=upload_pdf, inputs=file, outputs=upload_out)

    with gr.Tab("🧠 2. Generate Questions & Answers"):
        gr.Markdown("### 🤖 Generate Questions and Answers from Uploaded PDF")
        token_input = gr.Textbox(label="🔑 Enter Received Token", placeholder="e.g., 123e4567-e89b-12d3-a456...")
        output_box = gr.Textbox(label="📝 Generated Q&A", lines=15, interactive=False)
        gr.Button("🚀 Generate Q&A").click(fn=generate_qa, inputs=token_input, outputs=output_box)

    with gr.Tab("❓ 3. Ask a Question"):
        gr.Markdown("### 💬 Ask a question based on uploaded PDF")
        token_box = gr.Textbox(label="Token ID", placeholder="e.g., 123e4567-e89b-12d3-a456...")
        question_box = gr.Textbox(label="Type your question", placeholder="What is the main topic discussed?")
        answer_result = gr.Textbox(label="Answer Output", lines=6, interactive=False)
        gr.Button("🎯 Get Answer").click(fn=ask_question, inputs=[token_box, question_box], outputs=answer_result)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)