deepkansara-123 commited on
Commit
b183b9a
·
verified ·
1 Parent(s): cb90daa

Update app.py

Browse files

ui in tocken and file name in row

Files changed (1) hide show
  1. app.py +158 -194
app.py CHANGED
@@ -1,194 +1,158 @@
1
- import gradio as gr
2
- import uuid
3
- import sqlite3
4
- import json
5
- import re
6
- import PyPDF2
7
- import numpy as np
8
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
- from sklearn.metrics.pairwise import cosine_similarity
10
-
11
- # Local imports
12
- from database1 import create_db
13
- from first1 import pdf_query
14
-
15
- from ans_generator1 import AnswerGenerator
16
-
17
- import sqlite3, json
18
- from q_generator1 import QGenerator
19
- from transformers import pipeline
20
- # Initialize models
21
- qgen = QGenerator()
22
- ansgen = AnswerGenerator()
23
-
24
- # Load FLAN-T5 model
25
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False)
26
- model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
27
- qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
28
-
29
-
30
- # Upload and process PDF
31
- # ✅ Updated version – supports multiple PDF files
32
- def upload_pdf(files):
33
- try:
34
- messages = []
35
-
36
- for file in files:
37
- filename = file.name
38
- token = str(uuid.uuid4())
39
-
40
- pdf_reader = PyPDF2.PdfReader(file)
41
- text = "".join([page.extract_text() or "" for page in pdf_reader.pages])
42
- chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
43
-
44
- create_db(token, chunks, filename, text)
45
- messages.append(f"✅ Uploaded and stored: {filename} (Token: {token})")
46
-
47
- return "\n".join(messages)
48
-
49
- except Exception as e:
50
- return f"❌ Error: {str(e)}"
51
-
52
-
53
-
54
- # Load QG and QA once
55
- qgen = QGenerator()
56
- qa_model = pipeline("text2text-generation", model="google/flan-t5-base")
57
- def generate_qa(token):
58
- try:
59
- if not token:
60
- return "⚠️ Please provide a token."
61
-
62
- print("📥 Received Token:", token)
63
-
64
- # Load chunk_data using token
65
- with sqlite3.connect("my_database.db") as conn:
66
- cursor = conn.cursor()
67
- cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
68
- row = cursor.fetchone()
69
-
70
- if not row:
71
- print("❌ No data found for token in DB.")
72
- return "❌ No data found for this token."
73
-
74
- chunks = json.loads(row[0])
75
- if not chunks:
76
- print("⚠️ Chunk data is empty.")
77
- return "⚠️ No content available in database for this PDF."
78
-
79
- qa_pairs = []
80
-
81
- for i, chunk in enumerate(chunks):
82
- print(f"\n🔹 Processing chunk {i+1}/{len(chunks)}")
83
- questions = qgen.generate(chunk)
84
- print(f"🧠 Questions generated: {questions}")
85
-
86
- if not questions:
87
- print("⚠️ No questions generated for this chunk.")
88
- continue
89
-
90
- for question in questions[:2]: # Max 2 Qs per chunk
91
- prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
92
- print(f"➡️ Prompt:\n{prompt}")
93
-
94
- try:
95
- result = qa_model(prompt, max_length=256, do_sample=False)
96
- print(f"⬅️ Raw model output: {result}")
97
-
98
- if isinstance(result, list) and "generated_text" in result[0]:
99
- answer = result[0]["generated_text"].strip()
100
- elif isinstance(result, dict) and "answer" in result:
101
- answer = result["answer"].strip()
102
- else:
103
- answer = "N/A"
104
-
105
- print(f"✅ Final Answer: {answer}")
106
- qa_pairs.append(f"Q: {question}\nA: {answer}")
107
-
108
- except Exception as e:
109
- print(f"❌ QA model failed: {e}")
110
- continue
111
-
112
- if not qa_pairs:
113
- print("⚠️ No Q&A pairs generated.")
114
- return "⚠️ No Q&A pairs generated."
115
-
116
- print("✅ Final Q&A generated successfully.")
117
- return "\n\n".join(qa_pairs)
118
-
119
- except Exception as e:
120
- print(f"🔥 Exception in generate_qa(): {e}")
121
- return f" Error: {str(e)}"
122
-
123
-
124
- # Ask question using token (semantic similarity)
125
- def ask_question(token, question):
126
- try:
127
- with sqlite3.connect("my_database.db") as conn:
128
- cursor = conn.cursor()
129
- cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
130
- row = cursor.fetchone()
131
-
132
- if not row:
133
- return "❌ Token not found."
134
-
135
- chunks = json.loads(row[0])
136
- processor = pdf_query()
137
- model = processor.model
138
-
139
- clean_chunks = [re.sub(r'\s+', ' ', c.strip()) for c in chunks if c.strip()]
140
- if not clean_chunks:
141
- return "⚠️ No valid content found in PDF."
142
-
143
- chunk_embeddings = model.encode(clean_chunks)
144
- q_embedding = model.encode([question])
145
- scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
146
-
147
- top_index = int(np.argmax(scores))
148
- top_score = float(scores[top_index])
149
- best_text = clean_chunks[top_index]
150
-
151
- return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
152
-
153
- except Exception as e:
154
- return f" Error: {str(e)}"
155
-
156
-
157
-
158
-
159
-
160
-
161
- # ✅ Gradio UI
162
- with gr.Blocks(theme="default") as demo:
163
- gr.Markdown(
164
- """
165
- <div style='text-align: center; padding: 1rem;'>
166
- <h1 style='color: #3b82f6;'>📄 AI-Powered PDF Q&A System</h1>
167
- <p style='font-size: 1.1rem;'>Upload your PDFs, generate smart questions, and get intelligent answers.</p>
168
- </div>
169
- """
170
- )
171
-
172
- with gr.Tab("📤 1. Upload PDF"):
173
- gr.Markdown("### 🗂 Upload a PDF File")
174
- file = gr.File(label="Upload one or more PDFs", file_types=[".pdf"], file_count="multiple")
175
-
176
- upload_out = gr.Textbox(label="Upload Result", interactive=False)
177
- file.change(fn=upload_pdf, inputs=file, outputs=upload_out)
178
-
179
- with gr.Tab("🧠 2. Generate Questions & Answers"):
180
- gr.Markdown("### 🤖 Generate Questions and Answers from Uploaded PDF")
181
- token_input = gr.Textbox(label="🔑 Enter Received Token", placeholder="e.g., 123e4567-e89b-12d3-a456...")
182
- output_box = gr.Textbox(label="📝 Generated Q&A", lines=15, interactive=False)
183
- gr.Button("🚀 Generate Q&A").click(fn=generate_qa, inputs=token_input, outputs=output_box)
184
-
185
- with gr.Tab("❓ 3. Ask a Question"):
186
- gr.Markdown("### 💬 Ask a question based on uploaded PDF")
187
- token_box = gr.Textbox(label="Token ID", placeholder="e.g., 123e4567-e89b-12d3-a456...")
188
- question_box = gr.Textbox(label="Type your question", placeholder="What is the main topic discussed?")
189
- answer_result = gr.Textbox(label="Answer Output", lines=6, interactive=False)
190
- gr.Button("🎯 Get Answer").click(fn=ask_question, inputs=[token_box, question_box], outputs=answer_result)
191
-
192
- if __name__ == "__main__":
193
- demo.launch(server_name="0.0.0.0", server_port=7860)
194
-
 
1
+ import gradio as gr
2
+ import uuid
3
+ import sqlite3
4
+ import json
5
+ import re
6
+ import PyPDF2
7
+ import numpy as np
8
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # Local imports
12
+ from database1 import create_db
13
+ from first1 import pdf_query
14
+
15
+ from ans_generator1 import AnswerGenerator
16
+
17
+ import sqlite3, json
18
+ from q_generator1 import QGenerator
19
+ from transformers import pipeline
20
+ # Initialize models
21
+ qgen = QGenerator()
22
+ ansgen = AnswerGenerator()
23
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False)
24
+ model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
25
+ qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
26
+
27
+ # Upload and process PDF
28
+ def upload_pdf(files):
29
+ try:
30
+ messages = []
31
+
32
+ for file in files:
33
+ filename = file.name
34
+ token = str(uuid.uuid4())
35
+
36
+ pdf_reader = PyPDF2.PdfReader(file)
37
+ text = "".join([page.extract_text() or "" for page in pdf_reader.pages])
38
+ chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
39
+
40
+ create_db(token, chunks, filename, text)
41
+ messages.append(f"✅ Uploaded and stored: {filename}\n🔑 Token: {token}")
42
+
43
+ return "\n\n".join(messages)
44
+
45
+ except Exception as e:
46
+ return f"❌ Error: {str(e)}"
47
+
48
+ # ✅ Generate Questions & Answers
49
+ def generate_qa(token):
50
+ try:
51
+ if not token:
52
+ return "⚠️ Please provide a token."
53
+
54
+ print("📥 Received Token:", token)
55
+
56
+ with sqlite3.connect("my_database.db") as conn:
57
+ cursor = conn.cursor()
58
+ cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
59
+ row = cursor.fetchone()
60
+
61
+ if not row:
62
+ return " No data found for this token."
63
+
64
+ chunks = json.loads(row[0])
65
+ qa_pairs = []
66
+
67
+ for i, chunk in enumerate(chunks):
68
+ print(f"\n🔹 Processing chunk {i+1}/{len(chunks)}")
69
+ questions = qgen.generate(chunk)
70
+ print(f"🧠 Questions generated: {questions}")
71
+
72
+ if not questions:
73
+ continue
74
+
75
+ for question in questions[:2]:
76
+ prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
77
+ try:
78
+ result = qa_model(prompt, max_length=256, do_sample=False)
79
+ if isinstance(result, list) and "generated_text" in result[0]:
80
+ answer = result[0]["generated_text"].strip()
81
+ else:
82
+ answer = "N/A"
83
+
84
+ qa_pairs.append(f"Q: {question}\nA: {answer}")
85
+
86
+ except Exception as e:
87
+ continue
88
+
89
+ return "\n\n".join(qa_pairs) if qa_pairs else "⚠️ No Q&A pairs generated."
90
+
91
+ except Exception as e:
92
+ return f" Error: {str(e)}"
93
+
94
+ # ✅ Ask a question using token
95
+ def ask_question(token, question):
96
+ try:
97
+ with sqlite3.connect("my_database.db") as conn:
98
+ cursor = conn.cursor()
99
+ cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
100
+ row = cursor.fetchone()
101
+
102
+ if not row:
103
+ return "❌ Token not found."
104
+
105
+ chunks = json.loads(row[0])
106
+ processor = pdf_query()
107
+ model = processor.model
108
+
109
+ clean_chunks = [re.sub(r'\s+', ' ', c.strip()) for c in chunks if c.strip()]
110
+ if not clean_chunks:
111
+ return "⚠️ No valid content found in PDF."
112
+
113
+ chunk_embeddings = model.encode(clean_chunks)
114
+ q_embedding = model.encode([question])
115
+ scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
116
+
117
+ top_index = int(np.argmax(scores))
118
+ top_score = float(scores[top_index])
119
+ best_text = clean_chunks[top_index]
120
+
121
+ return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
122
+
123
+ except Exception as e:
124
+ return f"❌ Error: {str(e)}"
125
+
126
+ # ✅ Gradio UI
127
+ with gr.Blocks(theme="default") as demo:
128
+ gr.Markdown(
129
+ """
130
+ <div style='text-align: center; padding: 1rem;'>
131
+ <h1 style='color: #3b82f6;'>📄 AI-Powered PDF Q&A System</h1>
132
+ <p style='font-size: 1.1rem;'>Upload your PDFs, generate smart questions, and get intelligent answers.</p>
133
+ </div>
134
+ """
135
+ )
136
+
137
+ with gr.Tab("📤 1. Upload PDF"):
138
+ gr.Markdown("### 🗂 Upload a PDF File")
139
+ file = gr.File(label="Upload one or more PDFs", file_types=[".pdf"], file_count="multiple")
140
+ upload_out = gr.Textbox(label="Upload Result", interactive=False)
141
+ file.change(fn=upload_pdf, inputs=file, outputs=upload_out)
142
+
143
+ with gr.Tab("🧠 2. Generate Questions & Answers"):
144
+ gr.Markdown("### 🤖 Generate Questions and Answers from Uploaded PDF")
145
+ token_input = gr.Textbox(label="🔑 Enter Received Token", placeholder="e.g., 123e4567-e89b-12d3-a456...")
146
+ output_box = gr.Textbox(label="📝 Generated Q&A", lines=15, interactive=False)
147
+ gr.Button("🚀 Generate Q&A").click(fn=generate_qa, inputs=token_input, outputs=output_box)
148
+
149
+ with gr.Tab("❓ 3. Ask a Question"):
150
+ gr.Markdown("### 💬 Ask a question based on uploaded PDF")
151
+ token_box = gr.Textbox(label="Token ID", placeholder="e.g., 123e4567-e89b-12d3-a456...")
152
+ question_box = gr.Textbox(label="Type your question", placeholder="What is the main topic discussed?")
153
+ answer_result = gr.Textbox(label="Answer Output", lines=6, interactive=False)
154
+ gr.Button("🎯 Get Answer").click(fn=ask_question, inputs=[token_box, question_box], outputs=answer_result)
155
+
156
+ if __name__ == "__main__":
157
+ demo.launch(server_name="0.0.0.0", server_port=7860)
158
+