sidbhasin commited on
Commit
d96039d
·
verified ·
1 Parent(s): 18b6637

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -89
app.py CHANGED
@@ -13,7 +13,7 @@ st.set_page_config(
13
  layout="wide"
14
  )
15
 
16
- # Custom CSS
17
  st.markdown("""
18
  <style>
19
  .stApp {
@@ -26,20 +26,23 @@ st.markdown("""
26
  margin-bottom: 1rem;
27
  display: flex;
28
  flex-direction: column;
 
29
  }
30
  .chat-message.user {
31
  background-color: #2b313e;
32
  }
33
- .chat-message.bot {
34
  background-color: #475063;
35
  }
36
- .chat-message .message {
37
- color: #ffffff;
38
- font-size: 1.1rem;
 
 
39
  }
40
  .chat-message .metadata {
41
- color: #a8a8a8;
42
  font-size: 0.85rem;
 
43
  margin-top: 0.5rem;
44
  }
45
  .chat-input {
@@ -50,12 +53,25 @@ st.markdown("""
50
  padding: 1rem;
51
  background-color: #262730;
52
  }
 
 
 
 
 
 
 
 
53
  </style>
54
  """, unsafe_allow_html=True)
55
 
 
 
 
 
 
 
56
  @st.cache_resource
57
  def load_model():
58
- """Load the QA model"""
59
  return pipeline(
60
  "question-answering",
61
  model="deepset/roberta-base-squad2",
@@ -63,7 +79,6 @@ def load_model():
63
  )
64
 
65
  def extract_text_with_metadata(pdf_file):
66
- """Extract text from PDF with page numbers and paragraph information"""
67
  text_data = []
68
 
69
  with pdfplumber.open(pdf_file) as pdf:
@@ -85,53 +100,32 @@ def extract_text_with_metadata(pdf_file):
85
  return text_data
86
 
87
  def find_answer(question, text_data, qa_model):
88
- """Find answer in the text with context and metadata"""
89
  full_text = ' '.join([item['text'] for item in text_data])
90
- result = qa_model(question=question, context=full_text)
91
-
92
- answer_text = result['answer']
93
- answer_score = result['score']
94
-
95
- metadata = None
96
- context = None
97
 
98
- for item in text_data:
99
- if answer_text in item['text']:
100
- metadata = {
101
- 'page': item['page'],
102
- 'paragraph': item['paragraph'],
103
- 'line': item['line']
104
- }
105
- context = item['full_paragraph']
106
- break
107
-
108
- return {
109
- 'answer': answer_text,
110
- 'confidence': answer_score,
111
- 'metadata': metadata,
112
- 'context': context
113
- }
114
-
115
- def display_chat_message(message, is_user=False):
116
- """Display a chat message"""
117
- message_type = "user" if is_user else "bot"
118
- st.markdown(f"""
119
- <div class="chat-message {message_type}">
120
- <div class="message">{message['text']}</div>
121
- {f"<div class='metadata'>{message['metadata']}</div>" if 'metadata' in message else ""}
122
- </div>
123
- """, unsafe_allow_html=True)
124
 
125
  def main():
126
  st.title("📚 PDF AI Chat")
127
 
128
- # Initialize session state
129
- if 'chat_history' not in st.session_state:
130
- st.session_state.chat_history = []
131
- if 'text_data' not in st.session_state:
132
- st.session_state.text_data = None
133
-
134
- # Load model
135
  try:
136
  qa_model = load_model()
137
  except Exception as e:
@@ -149,57 +143,59 @@ def main():
149
  except Exception as e:
150
  st.error(f"Error processing PDF: {str(e)}")
151
  return
152
-
153
- # Display chat history
154
- for message in st.session_state.chat_history:
155
- display_chat_message(message, is_user=message['is_user'])
156
-
 
 
 
 
 
 
 
 
 
 
157
  # Chat input
158
- with st.container():
159
- st.markdown('<div class="chat-input">', unsafe_allow_html=True)
160
- question = st.text_input("Ask a question about the document:", key="chat_input")
161
- st.markdown('</div>', unsafe_allow_html=True)
162
-
163
- if question:
164
- # Add user question to chat history
165
- st.session_state.chat_history.append({'text': question, 'is_user': True})
166
 
 
167
  with st.spinner("Finding answer..."):
168
- try:
169
- result = find_answer(question, st.session_state.text_data, qa_model)
170
-
171
- # Create bot response
172
- bot_response = {
173
- 'text': result['answer'],
174
- 'metadata': f"Confidence: {result['confidence']:.2%} | Page: {result['metadata']['page']}, "
175
- f"Paragraph: {result['metadata']['paragraph']}, Line: {result['metadata']['line']}",
176
- 'is_user': False
177
- }
 
 
 
 
178
 
179
- # Add bot response to chat history
180
- st.session_state.chat_history.append(bot_response)
181
-
182
- # Force a rerun to update the chat display
183
- st.experimental_rerun()
184
-
185
- except Exception as e:
186
- st.error(f"Error finding answer: {str(e)}")
187
-
188
- # Instructions
189
- if not pdf_file:
190
  st.markdown("""
191
  ### Instructions:
192
  1. Upload a PDF document using the file uploader above
193
  2. Wait for the document to be processed
194
  3. Start asking questions about the document
195
- 4. Get detailed answers with page numbers and confidence scores
196
 
197
  ### Features:
198
- - Chat-like interface for asking multiple questions
199
- - Extracts answers from PDF documents
200
- - Provides page numbers and line information
201
- - Shows confidence scores
202
- - Handles multiple page documents
203
  """)
204
 
205
  if __name__ == "__main__":
 
13
  layout="wide"
14
  )
15
 
16
+ # Custom CSS with improved styling
17
  st.markdown("""
18
  <style>
19
  .stApp {
 
26
  margin-bottom: 1rem;
27
  display: flex;
28
  flex-direction: column;
29
+ color: #ffffff;
30
  }
31
  .chat-message.user {
32
  background-color: #2b313e;
33
  }
34
+ .chat-message.assistant {
35
  background-color: #475063;
36
  }
37
+ .chat-message .content {
38
+ display: flex;
39
+ margin-bottom: 0.5rem;
40
+ padding: 1rem;
41
+ border-radius: 0.5rem;
42
  }
43
  .chat-message .metadata {
 
44
  font-size: 0.85rem;
45
+ color: #a8a8a8;
46
  margin-top: 0.5rem;
47
  }
48
  .chat-input {
 
53
  padding: 1rem;
54
  background-color: #262730;
55
  }
56
+ .source-info {
57
+ font-size: 0.8rem;
58
+ color: #666;
59
+ margin-top: 0.5rem;
60
+ padding: 0.5rem;
61
+ background-color: #f0f2f6;
62
+ border-radius: 0.3rem;
63
+ }
64
  </style>
65
  """, unsafe_allow_html=True)
66
 
67
+ # Initialize session state
68
+ if 'messages' not in st.session_state:
69
+ st.session_state.messages = []
70
+ if 'text_data' not in st.session_state:
71
+ st.session_state.text_data = None
72
+
73
  @st.cache_resource
74
  def load_model():
 
75
  return pipeline(
76
  "question-answering",
77
  model="deepset/roberta-base-squad2",
 
79
  )
80
 
81
  def extract_text_with_metadata(pdf_file):
 
82
  text_data = []
83
 
84
  with pdfplumber.open(pdf_file) as pdf:
 
100
  return text_data
101
 
102
  def find_answer(question, text_data, qa_model):
 
103
  full_text = ' '.join([item['text'] for item in text_data])
 
 
 
 
 
 
 
104
 
105
+ try:
106
+ result = qa_model(question=question, context=full_text)
107
+
108
+ answer_text = result['answer']
109
+ answer_score = result['score']
110
+
111
+ # Find the source paragraph
112
+ for item in text_data:
113
+ if answer_text in item['text']:
114
+ return {
115
+ 'answer': answer_text,
116
+ 'confidence': answer_score,
117
+ 'page': item['page'],
118
+ 'paragraph': item['paragraph'],
119
+ 'line': item['line'],
120
+ 'context': item['full_paragraph']
121
+ }
122
+ except Exception as e:
123
+ st.error(f"Error processing question: {str(e)}")
124
+ return None
 
 
 
 
 
 
125
 
126
  def main():
127
  st.title("📚 PDF AI Chat")
128
 
 
 
 
 
 
 
 
129
  try:
130
  qa_model = load_model()
131
  except Exception as e:
 
143
  except Exception as e:
144
  st.error(f"Error processing PDF: {str(e)}")
145
  return
146
+
147
+ # Display chat messages
148
+ for message in st.session_state.messages:
149
+ with st.chat_message(message["role"]):
150
+ st.write(message["content"])
151
+ if "metadata" in message:
152
+ st.markdown(f"""
153
+ <div class="source-info">
154
+ Source: Page {message['metadata']['page']},
155
+ Paragraph {message['metadata']['paragraph']},
156
+ Line {message['metadata']['line']}
157
+ <br>Confidence: {message['metadata']['confidence']:.2%}
158
+ </div>
159
+ """, unsafe_allow_html=True)
160
+
161
  # Chat input
162
+ if st.session_state.text_data:
163
+ if question := st.chat_input("Ask a question about the document"):
164
+ # Add user message
165
+ st.session_state.messages.append({"role": "user", "content": question})
 
 
 
 
166
 
167
+ # Generate answer
168
  with st.spinner("Finding answer..."):
169
+ result = find_answer(question, st.session_state.text_data, qa_model)
170
+
171
+ if result:
172
+ # Add assistant message
173
+ st.session_state.messages.append({
174
+ "role": "assistant",
175
+ "content": result['answer'],
176
+ "metadata": {
177
+ "page": result['page'],
178
+ "paragraph": result['paragraph'],
179
+ "line": result['line'],
180
+ "confidence": result['confidence']
181
+ }
182
+ })
183
 
184
+ # Rerun to update chat display
185
+ st.rerun()
186
+ else:
 
 
 
 
 
 
 
 
187
  st.markdown("""
188
  ### Instructions:
189
  1. Upload a PDF document using the file uploader above
190
  2. Wait for the document to be processed
191
  3. Start asking questions about the document
192
+ 4. Get detailed answers with source information
193
 
194
  ### Features:
195
+ - Chat-like interface
196
+ - Source tracking
197
+ - Confidence scores
198
+ - Context preservation
 
199
  """)
200
 
201
  if __name__ == "__main__":