sidbhasin commited on
Commit
18b6637
ยท
verified ยท
1 Parent(s): 8079882

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -65
app.py CHANGED
@@ -8,7 +8,7 @@ import textwrap
8
 
9
  # Set page config
10
  st.set_page_config(
11
- page_title="PDF Question Answering System",
12
  page_icon="๐Ÿ“š",
13
  layout="wide"
14
  )
@@ -16,32 +16,39 @@ st.set_page_config(
16
  # Custom CSS
17
  st.markdown("""
18
  <style>
19
- .stApp {
20
  max-width: 1200px;
21
  margin: 0 auto;
22
  }
23
- .answer-box {
24
- padding: 20px;
25
- background-color: #f8f9fa;
26
- border-radius: 8px;
27
- margin: 10px 0;
28
- border-left: 4px solid #1f77b4;
29
  }
30
- .metadata-box {
31
- font-size: 0.9em;
32
- color: #666;
33
- margin-top: 10px;
34
- padding-top: 10px;
35
- border-top: 1px solid #eee;
36
  }
37
- .context-box {
38
- padding: 15px;
39
- background-color: #fff;
40
- border: 1px solid #ddd;
41
- border-radius: 4px;
42
- margin-top: 10px;
43
- font-size: 0.9em;
44
- color: #000; /* Set text color to black */
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
  </style>
47
  """, unsafe_allow_html=True)
@@ -63,13 +70,9 @@ def extract_text_with_metadata(pdf_file):
63
  for page_num, page in enumerate(pdf.pages, 1):
64
  text = page.extract_text()
65
  if text:
66
- # Split text into paragraphs
67
  paragraphs = text.split('\n\n')
68
-
69
- # Process each paragraph
70
  for para_num, paragraph in enumerate(paragraphs, 1):
71
  if paragraph.strip():
72
- # Split paragraph into lines
73
  lines = paragraph.split('\n')
74
  for line_num, line in enumerate(lines, 1):
75
  text_data.append({
@@ -79,22 +82,16 @@ def extract_text_with_metadata(pdf_file):
79
  'line': line_num,
80
  'full_paragraph': paragraph.strip()
81
  })
82
-
83
  return text_data
84
 
85
  def find_answer(question, text_data, qa_model):
86
  """Find answer in the text with context and metadata"""
87
- # Combine text data for QA model
88
  full_text = ' '.join([item['text'] for item in text_data])
89
-
90
- # Get answer from model
91
  result = qa_model(question=question, context=full_text)
92
 
93
- # Find the text segment containing the answer
94
  answer_text = result['answer']
95
  answer_score = result['score']
96
 
97
- # Find metadata for the answer
98
  metadata = None
99
  context = None
100
 
@@ -115,8 +112,24 @@ def find_answer(question, text_data, qa_model):
115
  'context': context
116
  }
117
 
 
 
 
 
 
 
 
 
 
 
118
  def main():
119
- st.title("๐Ÿ“š PDF Question Answering System")
 
 
 
 
 
 
120
 
121
  # Load model
122
  try:
@@ -128,65 +141,64 @@ def main():
128
  # File upload
129
  pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
130
 
131
- if pdf_file:
132
- # Extract text with metadata
133
  with st.spinner("Processing PDF..."):
134
  try:
135
- text_data = extract_text_with_metadata(pdf_file)
136
- st.session_state.text_data = text_data
137
  st.success("PDF processed successfully!")
138
  except Exception as e:
139
  st.error(f"Error processing PDF: {str(e)}")
140
  return
141
-
142
- # Question input
143
- question = st.text_input("Ask a question about the document:")
 
 
 
 
 
 
 
144
 
145
  if question:
 
 
 
146
  with st.spinner("Finding answer..."):
147
  try:
148
  result = find_answer(question, st.session_state.text_data, qa_model)
149
 
150
- # Display answer with metadata
151
- st.markdown("### Answer")
152
- st.markdown(f"""
153
- <div class="answer-box">
154
- <div>{result['answer']}</div>
155
- <div class="metadata-box">
156
- <strong>Confidence:</strong> {result['confidence']:.2%}<br>
157
- <strong>Location:</strong> Page {result['metadata']['page']},
158
- Paragraph {result['metadata']['paragraph']},
159
- Line {result['metadata']['line']}
160
- </div>
161
- </div>
162
- """, unsafe_allow_html=True)
163
 
164
- # Display context
165
- if result['context']:
166
- st.markdown("### Context")
167
- st.markdown(f"""
168
- <div class="context-box">
169
- {result['context']}
170
- </div>
171
- """, unsafe_allow_html=True)
172
 
173
  except Exception as e:
174
  st.error(f"Error finding answer: {str(e)}")
175
 
176
  # Instructions
177
- else:
178
  st.markdown("""
179
  ### Instructions:
180
  1. Upload a PDF document using the file uploader above
181
  2. Wait for the document to be processed
182
- 3. Type your question in the text input
183
- 4. Get detailed answers with page numbers and context
184
 
185
  ### Features:
 
186
  - Extracts answers from PDF documents
187
  - Provides page numbers and line information
188
  - Shows confidence scores
189
- - Displays relevant context
190
  - Handles multiple page documents
191
  """)
192
 
 
8
 
9
  # Set page config
10
  st.set_page_config(
11
+ page_title="PDF AI Chat",
12
  page_icon="๐Ÿ“š",
13
  layout="wide"
14
  )
 
16
  # Custom CSS
17
  st.markdown("""
18
  <style>
19
+ .stApp {
20
  max-width: 1200px;
21
  margin: 0 auto;
22
  }
23
+ .chat-message {
24
+ padding: 1.5rem;
25
+ border-radius: 0.5rem;
26
+ margin-bottom: 1rem;
27
+ display: flex;
28
+ flex-direction: column;
29
  }
30
+ .chat-message.user {
31
+ background-color: #2b313e;
 
 
 
 
32
  }
33
+ .chat-message.bot {
34
+ background-color: #475063;
35
+ }
36
+ .chat-message .message {
37
+ color: #ffffff;
38
+ font-size: 1.1rem;
39
+ }
40
+ .chat-message .metadata {
41
+ color: #a8a8a8;
42
+ font-size: 0.85rem;
43
+ margin-top: 0.5rem;
44
+ }
45
+ .chat-input {
46
+ position: fixed;
47
+ bottom: 0;
48
+ left: 0;
49
+ right: 0;
50
+ padding: 1rem;
51
+ background-color: #262730;
52
  }
53
  </style>
54
  """, unsafe_allow_html=True)
 
70
  for page_num, page in enumerate(pdf.pages, 1):
71
  text = page.extract_text()
72
  if text:
 
73
  paragraphs = text.split('\n\n')
 
 
74
  for para_num, paragraph in enumerate(paragraphs, 1):
75
  if paragraph.strip():
 
76
  lines = paragraph.split('\n')
77
  for line_num, line in enumerate(lines, 1):
78
  text_data.append({
 
82
  'line': line_num,
83
  'full_paragraph': paragraph.strip()
84
  })
 
85
  return text_data
86
 
87
  def find_answer(question, text_data, qa_model):
88
  """Find answer in the text with context and metadata"""
 
89
  full_text = ' '.join([item['text'] for item in text_data])
 
 
90
  result = qa_model(question=question, context=full_text)
91
 
 
92
  answer_text = result['answer']
93
  answer_score = result['score']
94
 
 
95
  metadata = None
96
  context = None
97
 
 
112
  'context': context
113
  }
114
 
115
+ def display_chat_message(message, is_user=False):
116
+ """Display a chat message"""
117
+ message_type = "user" if is_user else "bot"
118
+ st.markdown(f"""
119
+ <div class="chat-message {message_type}">
120
+ <div class="message">{message['text']}</div>
121
+ {f"<div class='metadata'>{message['metadata']}</div>" if 'metadata' in message else ""}
122
+ </div>
123
+ """, unsafe_allow_html=True)
124
+
125
  def main():
126
+ st.title("๐Ÿ“š PDF AI Chat")
127
+
128
+ # Initialize session state
129
+ if 'chat_history' not in st.session_state:
130
+ st.session_state.chat_history = []
131
+ if 'text_data' not in st.session_state:
132
+ st.session_state.text_data = None
133
 
134
  # Load model
135
  try:
 
141
  # File upload
142
  pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
143
 
144
+ if pdf_file and not st.session_state.text_data:
 
145
  with st.spinner("Processing PDF..."):
146
  try:
147
+ st.session_state.text_data = extract_text_with_metadata(pdf_file)
 
148
  st.success("PDF processed successfully!")
149
  except Exception as e:
150
  st.error(f"Error processing PDF: {str(e)}")
151
  return
152
+
153
+ # Display chat history
154
+ for message in st.session_state.chat_history:
155
+ display_chat_message(message, is_user=message['is_user'])
156
+
157
+ # Chat input
158
+ with st.container():
159
+ st.markdown('<div class="chat-input">', unsafe_allow_html=True)
160
+ question = st.text_input("Ask a question about the document:", key="chat_input")
161
+ st.markdown('</div>', unsafe_allow_html=True)
162
 
163
  if question:
164
+ # Add user question to chat history
165
+ st.session_state.chat_history.append({'text': question, 'is_user': True})
166
+
167
  with st.spinner("Finding answer..."):
168
  try:
169
  result = find_answer(question, st.session_state.text_data, qa_model)
170
 
171
+ # Create bot response
172
+ bot_response = {
173
+ 'text': result['answer'],
174
+ 'metadata': f"Confidence: {result['confidence']:.2%} | Page: {result['metadata']['page']}, "
175
+ f"Paragraph: {result['metadata']['paragraph']}, Line: {result['metadata']['line']}",
176
+ 'is_user': False
177
+ }
178
+
179
+ # Add bot response to chat history
180
+ st.session_state.chat_history.append(bot_response)
 
 
 
181
 
182
+ # Force a rerun to update the chat display
183
+ st.experimental_rerun()
 
 
 
 
 
 
184
 
185
  except Exception as e:
186
  st.error(f"Error finding answer: {str(e)}")
187
 
188
  # Instructions
189
+ if not pdf_file:
190
  st.markdown("""
191
  ### Instructions:
192
  1. Upload a PDF document using the file uploader above
193
  2. Wait for the document to be processed
194
+ 3. Start asking questions about the document
195
+ 4. Get detailed answers with page numbers and confidence scores
196
 
197
  ### Features:
198
+ - Chat-like interface for asking multiple questions
199
  - Extracts answers from PDF documents
200
  - Provides page numbers and line information
201
  - Shows confidence scores
 
202
  - Handles multiple page documents
203
  """)
204