Spaces:

sidbhasin
/

PDF_Answer_AI_By_Syncmerce

Sleeping

App Files Files Community

sidbhasin commited on Nov 12, 2024

Commit

d96039d

verified ·

1 Parent(s): 18b6637

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -89

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ st.set_page_config(
     layout="wide"
 )
-# Custom CSS
 st.markdown("""
     <style>
     .stApp {
@@ -26,20 +26,23 @@ st.markdown("""
         margin-bottom: 1rem;
         display: flex;
         flex-direction: column;
     }
     .chat-message.user {
         background-color: #2b313e;
     }
-    .chat-message.bot {
         background-color: #475063;
     }
-    .chat-message .message {
-        color: #ffffff;
-        font-size: 1.1rem;
     }
     .chat-message .metadata {
-        color: #a8a8a8;
         font-size: 0.85rem;
         margin-top: 0.5rem;
     }
     .chat-input {
@@ -50,12 +53,25 @@ st.markdown("""
         padding: 1rem;
         background-color: #262730;
     }
     </style>
 """, unsafe_allow_html=True)
 @st.cache_resource
 def load_model():
-    """Load the QA model"""
     return pipeline(
         "question-answering",
         model="deepset/roberta-base-squad2",
@@ -63,7 +79,6 @@ def load_model():
     )
 def extract_text_with_metadata(pdf_file):
-    """Extract text from PDF with page numbers and paragraph information"""
     text_data = []
     with pdfplumber.open(pdf_file) as pdf:
@@ -85,53 +100,32 @@ def extract_text_with_metadata(pdf_file):
     return text_data
 def find_answer(question, text_data, qa_model):
-    """Find answer in the text with context and metadata"""
     full_text = ' '.join([item['text'] for item in text_data])
-    result = qa_model(question=question, context=full_text)
-    answer_text = result['answer']
-    answer_score = result['score']
-    metadata = None
-    context = None
-    for item in text_data:
-        if answer_text in item['text']:
-            metadata = {
-                'page': item['page'],
-                'paragraph': item['paragraph'],
-                'line': item['line']
-            }
-            context = item['full_paragraph']
-            break
-    return {
-        'answer': answer_text,
-        'confidence': answer_score,
-        'metadata': metadata,
-        'context': context
-    }
-def display_chat_message(message, is_user=False):
-    """Display a chat message"""
-    message_type = "user" if is_user else "bot"
-    st.markdown(f"""
-        <div class="chat-message {message_type}">
-            <div class="message">{message['text']}</div>
-            {f"<div class='metadata'>{message['metadata']}</div>" if 'metadata' in message else ""}
-        </div>
-    """, unsafe_allow_html=True)
 def main():
     st.title("📚 PDF AI Chat")
-    # Initialize session state
-    if 'chat_history' not in st.session_state:
-        st.session_state.chat_history = []
-    if 'text_data' not in st.session_state:
-        st.session_state.text_data = None
-    # Load model
     try:
         qa_model = load_model()
     except Exception as e:
@@ -149,57 +143,59 @@ def main():
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
-    # Display chat history
-    for message in st.session_state.chat_history:
-        display_chat_message(message, is_user=message['is_user'])
     # Chat input
-    with st.container():
-        st.markdown('<div class="chat-input">', unsafe_allow_html=True)
-        question = st.text_input("Ask a question about the document:", key="chat_input")
-        st.markdown('</div>', unsafe_allow_html=True)
-        if question:
-            # Add user question to chat history
-            st.session_state.chat_history.append({'text': question, 'is_user': True})
             with st.spinner("Finding answer..."):
-                try:
-                    result = find_answer(question, st.session_state.text_data, qa_model)
-                    # Create bot response
-                    bot_response = {
-                        'text': result['answer'],
-                        'metadata': f"Confidence: {result['confidence']:.2%} | Page: {result['metadata']['page']}, "
-                                    f"Paragraph: {result['metadata']['paragraph']}, Line: {result['metadata']['line']}",
-                        'is_user': False
-                    }
-                    # Add bot response to chat history
-                    st.session_state.chat_history.append(bot_response)
-                    # Force a rerun to update the chat display
-                    st.experimental_rerun()
-                except Exception as e:
-                    st.error(f"Error finding answer: {str(e)}")
-    # Instructions
-    if not pdf_file:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
             3. Start asking questions about the document
-            4. Get detailed answers with page numbers and confidence scores
             ### Features:
-            - Chat-like interface for asking multiple questions
-            - Extracts answers from PDF documents
-            - Provides page numbers and line information
-            - Shows confidence scores
-            - Handles multiple page documents
         """)
 if __name__ == "__main__":

     layout="wide"
 )
+# Custom CSS with improved styling
 st.markdown("""
     <style>
     .stApp {
         margin-bottom: 1rem;
         display: flex;
         flex-direction: column;
+        color: #ffffff;
     }
     .chat-message.user {
         background-color: #2b313e;
     }
+    .chat-message.assistant {
         background-color: #475063;
     }
+    .chat-message .content {
+        display: flex;
+        margin-bottom: 0.5rem;
+        padding: 1rem;
+        border-radius: 0.5rem;
     }
     .chat-message .metadata {
         font-size: 0.85rem;
+        color: #a8a8a8;
         margin-top: 0.5rem;
     }
     .chat-input {
         padding: 1rem;
         background-color: #262730;
     }
+    .source-info {
+        font-size: 0.8rem;
+        color: #666;
+        margin-top: 0.5rem;
+        padding: 0.5rem;
+        background-color: #f0f2f6;
+        border-radius: 0.3rem;
+    }
     </style>
 """, unsafe_allow_html=True)
+# Initialize session state
+if 'messages' not in st.session_state:
+    st.session_state.messages = []
+if 'text_data' not in st.session_state:
+    st.session_state.text_data = None
 @st.cache_resource
 def load_model():
     return pipeline(
         "question-answering",
         model="deepset/roberta-base-squad2",
     )
 def extract_text_with_metadata(pdf_file):
     text_data = []
     with pdfplumber.open(pdf_file) as pdf:
     return text_data
 def find_answer(question, text_data, qa_model):
     full_text = ' '.join([item['text'] for item in text_data])
+    try:
+        result = qa_model(question=question, context=full_text)
+        answer_text = result['answer']
+        answer_score = result['score']
+        # Find the source paragraph
+        for item in text_data:
+            if answer_text in item['text']:
+                return {
+                    'answer': answer_text,
+                    'confidence': answer_score,
+                    'page': item['page'],
+                    'paragraph': item['paragraph'],
+                    'line': item['line'],
+                    'context': item['full_paragraph']
+                }
+    except Exception as e:
+        st.error(f"Error processing question: {str(e)}")
+        return None
 def main():
     st.title("📚 PDF AI Chat")
     try:
         qa_model = load_model()
     except Exception as e:
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+            if "metadata" in message:
+                st.markdown(f"""
+                    <div class="source-info">
+                        Source: Page {message['metadata']['page']},
+                        Paragraph {message['metadata']['paragraph']},
+                        Line {message['metadata']['line']}
+                        <br>Confidence: {message['metadata']['confidence']:.2%}
+                    </div>
+                """, unsafe_allow_html=True)
     # Chat input
+    if st.session_state.text_data:
+        if question := st.chat_input("Ask a question about the document"):
+            # Add user message
+            st.session_state.messages.append({"role": "user", "content": question})
+            # Generate answer
             with st.spinner("Finding answer..."):
+                result = find_answer(question, st.session_state.text_data, qa_model)
+                if result:
+                    # Add assistant message
+                    st.session_state.messages.append({
+                        "role": "assistant",
+                        "content": result['answer'],
+                        "metadata": {
+                            "page": result['page'],
+                            "paragraph": result['paragraph'],
+                            "line": result['line'],
+                            "confidence": result['confidence']
+                        }
+                    })
+                    # Rerun to update chat display
+                    st.rerun()
+    else:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
             3. Start asking questions about the document
+            4. Get detailed answers with source information
             ### Features:
+            - Chat-like interface
+            - Source tracking
+            - Confidence scores
+            - Context preservation
         """)
 if __name__ == "__main__":