deepkansara-123 commited on
Commit
9578afc
Β·
verified Β·
1 Parent(s): 38118f1

Upload 7 files

Browse files
Files changed (1) hide show
  1. app.py +161 -152
app.py CHANGED
@@ -1,152 +1,161 @@
1
- import gradio as gr
2
- import uuid
3
- import sqlite3
4
- import json
5
- import re
6
- import PyPDF2
7
- import numpy as np
8
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
- from sklearn.metrics.pairwise import cosine_similarity
10
-
11
- # Local imports
12
- from database1 import create_db
13
- from first1 import pdf_query
14
- from q_generator1 import QGenerator
15
- from ans_generator1 import AnswerGenerator
16
-
17
- # Initialize models
18
- qgen = QGenerator()
19
- ansgen = AnswerGenerator()
20
-
21
- # Load FLAN-T5 model
22
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False)
23
- model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
24
- qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
25
-
26
- # βœ… Upload and process PDF
27
- def upload_pdf(file):
28
- try:
29
- filename = file.name
30
- token = str(uuid.uuid4())
31
-
32
- pdf_reader = PyPDF2.PdfReader(file)
33
- text = "".join([page.extract_text() or "" for page in pdf_reader.pages])
34
- chunks = [text[i:i+500] for i in range(0, len(text), 500)]
35
-
36
- create_db(token, chunks, filename, text)
37
- return f"βœ… Uploaded and stored: {filename} (Token: {token})"
38
- except Exception as e:
39
- return f"❌ Error: {str(e)}"
40
-
41
- # βœ… Generate Q&A using filename
42
- def generate_qa(filename):
43
- try:
44
- with sqlite3.connect("my_database1.db") as conn:
45
- cursor = conn.cursor()
46
- cursor.execute("SELECT chunk_data FROM token_data WHERE filename = ?", (filename,))
47
- row = cursor.fetchone()
48
-
49
- if not row:
50
- return "❌ No data found for this filename."
51
-
52
- chunks = json.loads(row[0])
53
- qa_pairs = []
54
-
55
- for chunk in chunks:
56
- questions = qgen.generate(chunk)
57
- if not questions:
58
- continue
59
- question = questions[0]
60
- prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
61
- result = qa_model(prompt, max_length=256, do_sample=False)
62
- answer = result[0]["generated_text"].strip()
63
- qa_pairs.append(f"Q: {question}\nA: {answer}")
64
- return "\n\n".join(qa_pairs)
65
- except Exception as e:
66
- return f"❌ Error: {str(e)}"
67
-
68
- # βœ… Ask question using token (semantic similarity)
69
- def ask_question(token, question):
70
- try:
71
- with sqlite3.connect("my_database.db") as conn:
72
- cursor = conn.cursor()
73
- cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
74
- row = cursor.fetchone()
75
-
76
- if not row:
77
- return "❌ Token not found."
78
-
79
- chunks = json.loads(row[0])
80
- processor = pdf_query()
81
- model = processor.model
82
- chunk_embeddings = model.encode(chunks)
83
- q_embedding = model.encode([question])
84
- scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
85
- top_index = int(np.argmax(scores))
86
- top_score = float(scores[top_index])
87
- best_text = re.sub(r'\s+', ' ', chunks[top_index].strip())
88
-
89
- if top_score >= 0.5:
90
- return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
91
- else:
92
- return "⚠️ No relevant answer found (score too low)."
93
- except Exception as e:
94
- return f"❌ Error: {str(e)}"
95
-
96
- # βœ… View uploaded PDFs (Gradio Tab)
97
- def list_uploaded_pdfs():
98
- try:
99
- with sqlite3.connect("my_database.db") as conn:
100
- cursor = conn.cursor()
101
- cursor.execute("SELECT filename, token_id, content FROM token_data")
102
- rows = cursor.fetchall()
103
-
104
- if not rows:
105
- return "ℹ️ No PDFs uploaded yet."
106
-
107
- result = ""
108
- for filename, token, content in rows:
109
- preview = content[:200].replace("\n", " ") + "..." if len(content) > 200 else content
110
- result += f"πŸ“„ **Filename:** {filename}\nπŸ”‘ Token: `{token}`\nπŸ“ Preview: {preview}\n\n---\n"
111
- return result
112
- except Exception as e:
113
- return f"❌ Error: {str(e)}"
114
-
115
- # βœ… Gradio UI
116
- with gr.Blocks(theme="default") as demo:
117
- gr.Markdown(
118
- """
119
- <div style='text-align: center; padding: 1rem;'>
120
- <h1 style='color: #3b82f6;'>πŸ“„ AI-Powered PDF Q&A System</h1>
121
- <p style='font-size: 1.1rem;'>Upload your PDFs, generate smart questions, and get intelligent answers.</p>
122
- </div>
123
- """
124
- )
125
-
126
- with gr.Tab("πŸ“€ 1. Upload PDF"):
127
- gr.Markdown("### πŸ—‚ Upload a PDF File")
128
- file = gr.File(label="Choose your PDF file", file_types=[".pdf"])
129
- upload_out = gr.Textbox(label="Upload Result", interactive=False)
130
- file.change(fn=upload_pdf, inputs=file, outputs=upload_out)
131
-
132
- with gr.Tab("🧠 2. Generate Questions & Answers"):
133
- gr.Markdown("### πŸ€– Generate Q&A from your PDF")
134
- fname = gr.Textbox(label="Enter uploaded filename", placeholder="example.pdf")
135
- qa_result = gr.Textbox(label="Generated Q&A", lines=12, interactive=False)
136
- gr.Button("πŸš€ Generate Q&A").click(fn=generate_qa, inputs=fname, outputs=qa_result)
137
-
138
- with gr.Tab("❓ 3. Ask a Question"):
139
- gr.Markdown("### πŸ’¬ Ask a question based on uploaded PDF")
140
- token_box = gr.Textbox(label="Token ID", placeholder="e.g., 123e4567-e89b-12d3-a456...")
141
- question_box = gr.Textbox(label="Type your question", placeholder="What is the main topic discussed?")
142
- answer_result = gr.Textbox(label="Answer Output", lines=6, interactive=False)
143
- gr.Button("🎯 Get Answer").click(fn=ask_question, inputs=[token_box, question_box], outputs=answer_result)
144
-
145
- with gr.Tab("πŸ“š 4. View Uploaded PDFs"):
146
- gr.Markdown("### πŸ“‹ Uploaded PDF List (with Preview Snippets)")
147
- list_btn = gr.Button("πŸ”„ Refresh List")
148
- pdf_list_output = gr.Markdown()
149
- list_btn.click(fn=list_uploaded_pdfs, outputs=pdf_list_output)
150
-
151
- if __name__ == "__main__":
152
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import uuid
3
+ import sqlite3
4
+ import json
5
+ import re
6
+ import PyPDF2
7
+ import numpy as np
8
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # Local imports
12
+ from database1 import create_db
13
+ from first1 import pdf_query
14
+ from q_generator1 import QGenerator
15
+ from ans_generator1 import AnswerGenerator
16
+
17
+ # Initialize models
18
+ qgen = QGenerator()
19
+ ansgen = AnswerGenerator()
20
+
21
+ # Load FLAN-T5 model
22
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", use_fast=False)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
24
+ qa_model = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
25
+
26
+ # βœ… Upload and process PDF
27
+ # βœ… Updated version – supports multiple PDF files
28
+ def upload_pdf(files):
29
+ try:
30
+ messages = []
31
+
32
+ for file in files:
33
+ filename = file.name
34
+ token = str(uuid.uuid4())
35
+
36
+ pdf_reader = PyPDF2.PdfReader(file)
37
+ text = "".join([page.extract_text() or "" for page in pdf_reader.pages])
38
+ chunks = [text[i:i+500] for i in range(0, len(text), 500)]
39
+
40
+ create_db(token, chunks, filename, text)
41
+ messages.append(f"βœ… Uploaded and stored: {filename} (Token: {token})")
42
+
43
+ return "\n".join(messages)
44
+
45
+ except Exception as e:
46
+ return f"❌ Error: {str(e)}"
47
+
48
+
49
+ # βœ… Generate Q&A using filename
50
+ def generate_qa(filename):
51
+ try:
52
+ with sqlite3.connect("my_database1.db") as conn:
53
+ cursor = conn.cursor()
54
+ cursor.execute("SELECT chunk_data FROM token_data WHERE filename = ?", (filename,))
55
+ row = cursor.fetchone()
56
+
57
+ if not row:
58
+ return "❌ No data found for this filename."
59
+
60
+ chunks = json.loads(row[0])
61
+ qa_pairs = []
62
+
63
+ for chunk in chunks:
64
+ questions = qgen.generate(chunk)
65
+ if not questions:
66
+ continue
67
+ question = questions[0]
68
+ prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
69
+ result = qa_model(prompt, max_length=256, do_sample=False)
70
+ answer = result[0]["generated_text"].strip()
71
+ qa_pairs.append(f"Q: {question}\nA: {answer}")
72
+ return "\n\n".join(qa_pairs)
73
+ except Exception as e:
74
+ return f"❌ Error: {str(e)}"
75
+
76
+ # βœ… Ask question using token (semantic similarity)
77
+ def ask_question(token, question):
78
+ try:
79
+ with sqlite3.connect("my_database.db") as conn:
80
+ cursor = conn.cursor()
81
+ cursor.execute("SELECT chunk_data FROM token_data WHERE token_id = ?", (token,))
82
+ row = cursor.fetchone()
83
+
84
+ if not row:
85
+ return "❌ Token not found."
86
+
87
+ chunks = json.loads(row[0])
88
+ processor = pdf_query()
89
+ model = processor.model
90
+ chunk_embeddings = model.encode(chunks)
91
+ q_embedding = model.encode([question])
92
+ scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
93
+ top_index = int(np.argmax(scores))
94
+ top_score = float(scores[top_index])
95
+ best_text = re.sub(r'\s+', ' ', chunks[top_index].strip())
96
+
97
+ if top_score >= 0.5:
98
+ return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
99
+ else:
100
+ return "⚠️ No relevant answer found (score too low)."
101
+ except Exception as e:
102
+ return f"❌ Error: {str(e)}"
103
+
104
+ # βœ… View uploaded PDFs (Gradio Tab)
105
+ def list_uploaded_pdfs():
106
+ try:
107
+ with sqlite3.connect("my_database.db") as conn:
108
+ cursor = conn.cursor()
109
+ cursor.execute("SELECT filename, token_id, content FROM token_data")
110
+ rows = cursor.fetchall()
111
+
112
+ if not rows:
113
+ return "ℹ️ No PDFs uploaded yet."
114
+
115
+ result = ""
116
+ for filename, token, content in rows:
117
+ preview = content[:200].replace("\n", " ") + "..." if len(content) > 200 else content
118
+ result += f"πŸ“„ **Filename:** {filename}\nπŸ”‘ Token: `{token}`\nπŸ“ Preview: {preview}\n\n---\n"
119
+ return result
120
+ except Exception as e:
121
+ return f"❌ Error: {str(e)}"
122
+
123
+ # βœ… Gradio UI
124
+ with gr.Blocks(theme="default") as demo:
125
+ gr.Markdown(
126
+ """
127
+ <div style='text-align: center; padding: 1rem;'>
128
+ <h1 style='color: #3b82f6;'>πŸ“„ AI-Powered PDF Q&A System</h1>
129
+ <p style='font-size: 1.1rem;'>Upload your PDFs, generate smart questions, and get intelligent answers.</p>
130
+ </div>
131
+ """
132
+ )
133
+
134
+ with gr.Tab("πŸ“€ 1. Upload PDF"):
135
+ gr.Markdown("### πŸ—‚ Upload a PDF File")
136
+ file = gr.File(label="Upload one or more PDFs", file_types=[".pdf"], file_count="multiple")
137
+
138
+ upload_out = gr.Textbox(label="Upload Result", interactive=False)
139
+ file.change(fn=upload_pdf, inputs=file, outputs=upload_out)
140
+
141
+ with gr.Tab("🧠 2. Generate Questions & Answers"):
142
+ gr.Markdown("### πŸ€– Generate Q&A from your PDF")
143
+ fname = gr.Textbox(label="Enter uploaded filename", placeholder="example.pdf")
144
+ qa_result = gr.Textbox(label="Generated Q&A", lines=12, interactive=False)
145
+ gr.Button("πŸš€ Generate Q&A").click(fn=generate_qa, inputs=fname, outputs=qa_result)
146
+
147
+ with gr.Tab("❓ 3. Ask a Question"):
148
+ gr.Markdown("### πŸ’¬ Ask a question based on uploaded PDF")
149
+ token_box = gr.Textbox(label="Token ID", placeholder="e.g., 123e4567-e89b-12d3-a456...")
150
+ question_box = gr.Textbox(label="Type your question", placeholder="What is the main topic discussed?")
151
+ answer_result = gr.Textbox(label="Answer Output", lines=6, interactive=False)
152
+ gr.Button("🎯 Get Answer").click(fn=ask_question, inputs=[token_box, question_box], outputs=answer_result)
153
+
154
+ with gr.Tab("πŸ“š 4. View Uploaded PDFs"):
155
+ gr.Markdown("### πŸ“‹ Uploaded PDF List (with Preview Snippets)")
156
+ list_btn = gr.Button("πŸ”„ Refresh List")
157
+ pdf_list_output = gr.Markdown()
158
+ list_btn.click(fn=list_uploaded_pdfs, outputs=pdf_list_output)
159
+
160
+ if __name__ == "__main__":
161
+ demo.launch(server_name="0.0.0.0", server_port=7860)