Spaces:

sidbhasin
/

PDF_Answer_AI_By_Syncmerce

Sleeping

App Files Files Community

sidbhasin commited on Nov 12, 2024

Commit

18b6637

verified ·

1 Parent(s): 8079882

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -65

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import textwrap
 # Set page config
 st.set_page_config(
-    page_title="PDF Question Answering System",
     page_icon="📚",
     layout="wide"
 )
@@ -16,32 +16,39 @@ st.set_page_config(
 # Custom CSS
 st.markdown("""
     <style>
-       .stApp {
         max-width: 1200px;
         margin: 0 auto;
     }
-    .answer-box {
-        padding: 20px;
-        background-color: #f8f9fa;
-        border-radius: 8px;
-        margin: 10px 0;
-        border-left: 4px solid #1f77b4;
     }
-    .metadata-box {
-        font-size: 0.9em;
-        color: #666;
-        margin-top: 10px;
-        padding-top: 10px;
-        border-top: 1px solid #eee;
     }
-    .context-box {
-        padding: 15px;
-        background-color: #fff;
-        border: 1px solid #ddd;
-        border-radius: 4px;
-        margin-top: 10px;
-        font-size: 0.9em;
-        color: #000;  /* Set text color to black */
     }
     </style>
 """, unsafe_allow_html=True)
@@ -63,13 +70,9 @@ def extract_text_with_metadata(pdf_file):
         for page_num, page in enumerate(pdf.pages, 1):
             text = page.extract_text()
             if text:
-                # Split text into paragraphs
                 paragraphs = text.split('\n\n')
-                # Process each paragraph
                 for para_num, paragraph in enumerate(paragraphs, 1):
                     if paragraph.strip():
-                        # Split paragraph into lines
                         lines = paragraph.split('\n')
                         for line_num, line in enumerate(lines, 1):
                             text_data.append({
@@ -79,22 +82,16 @@ def extract_text_with_metadata(pdf_file):
                                 'line': line_num,
                                 'full_paragraph': paragraph.strip()
                             })
     return text_data
 def find_answer(question, text_data, qa_model):
     """Find answer in the text with context and metadata"""
-    # Combine text data for QA model
     full_text = ' '.join([item['text'] for item in text_data])
-    # Get answer from model
     result = qa_model(question=question, context=full_text)
-    # Find the text segment containing the answer
     answer_text = result['answer']
     answer_score = result['score']
-    # Find metadata for the answer
     metadata = None
     context = None
@@ -115,8 +112,24 @@ def find_answer(question, text_data, qa_model):
         'context': context
     }
 def main():
-    st.title("📚 PDF Question Answering System")
     # Load model
     try:
@@ -128,65 +141,64 @@ def main():
     # File upload
     pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
-    if pdf_file:
-        # Extract text with metadata
         with st.spinner("Processing PDF..."):
             try:
-                text_data = extract_text_with_metadata(pdf_file)
-                st.session_state.text_data = text_data
                 st.success("PDF processed successfully!")
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
-        # Question input
-        question = st.text_input("Ask a question about the document:")
         if question:
             with st.spinner("Finding answer..."):
                 try:
                     result = find_answer(question, st.session_state.text_data, qa_model)
-                    # Display answer with metadata
-                    st.markdown("### Answer")
-                    st.markdown(f"""
-                        <div class="answer-box">
-                            <div>{result['answer']}</div>
-                            <div class="metadata-box">
-                                <strong>Confidence:</strong> {result['confidence']:.2%}<br>
-                                <strong>Location:</strong> Page {result['metadata']['page']},
-                                Paragraph {result['metadata']['paragraph']},
-                                Line {result['metadata']['line']}
-                            </div>
-                        </div>
-                        """, unsafe_allow_html=True)
-                    # Display context
-                    if result['context']:
-                        st.markdown("### Context")
-                        st.markdown(f"""
-                            <div class="context-box">
-                                {result['context']}
-                            </div>
-                        """, unsafe_allow_html=True)
                 except Exception as e:
                     st.error(f"Error finding answer: {str(e)}")
     # Instructions
-    else:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
-            3. Type your question in the text input
-            4. Get detailed answers with page numbers and context
             ### Features:
             - Extracts answers from PDF documents
             - Provides page numbers and line information
             - Shows confidence scores
-            - Displays relevant context
             - Handles multiple page documents
         """)

 # Set page config
 st.set_page_config(
+    page_title="PDF AI Chat",
     page_icon="📚",
     layout="wide"
 )
 # Custom CSS
 st.markdown("""
     <style>
+    .stApp {
         max-width: 1200px;
         margin: 0 auto;
     }
+    .chat-message {
+        padding: 1.5rem;
+        border-radius: 0.5rem;
+        margin-bottom: 1rem;
+        display: flex;
+        flex-direction: column;
     }
+    .chat-message.user {
+        background-color: #2b313e;
     }
+    .chat-message.bot {
+        background-color: #475063;
+    }
+    .chat-message .message {
+        color: #ffffff;
+        font-size: 1.1rem;
+    }
+    .chat-message .metadata {
+        color: #a8a8a8;
+        font-size: 0.85rem;
+        margin-top: 0.5rem;
+    }
+    .chat-input {
+        position: fixed;
+        bottom: 0;
+        left: 0;
+        right: 0;
+        padding: 1rem;
+        background-color: #262730;
     }
     </style>
 """, unsafe_allow_html=True)
         for page_num, page in enumerate(pdf.pages, 1):
             text = page.extract_text()
             if text:
                 paragraphs = text.split('\n\n')
                 for para_num, paragraph in enumerate(paragraphs, 1):
                     if paragraph.strip():
                         lines = paragraph.split('\n')
                         for line_num, line in enumerate(lines, 1):
                             text_data.append({
                                 'line': line_num,
                                 'full_paragraph': paragraph.strip()
                             })
     return text_data
 def find_answer(question, text_data, qa_model):
     """Find answer in the text with context and metadata"""
     full_text = ' '.join([item['text'] for item in text_data])
     result = qa_model(question=question, context=full_text)
     answer_text = result['answer']
     answer_score = result['score']
     metadata = None
     context = None
         'context': context
     }
+def display_chat_message(message, is_user=False):
+    """Display a chat message"""
+    message_type = "user" if is_user else "bot"
+    st.markdown(f"""
+        <div class="chat-message {message_type}">
+            <div class="message">{message['text']}</div>
+            {f"<div class='metadata'>{message['metadata']}</div>" if 'metadata' in message else ""}
+        </div>
+    """, unsafe_allow_html=True)
 def main():
+    st.title("📚 PDF AI Chat")
+    # Initialize session state
+    if 'chat_history' not in st.session_state:
+        st.session_state.chat_history = []
+    if 'text_data' not in st.session_state:
+        st.session_state.text_data = None
     # Load model
     try:
     # File upload
     pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
+    if pdf_file and not st.session_state.text_data:
         with st.spinner("Processing PDF..."):
             try:
+                st.session_state.text_data = extract_text_with_metadata(pdf_file)
                 st.success("PDF processed successfully!")
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
+    # Display chat history
+    for message in st.session_state.chat_history:
+        display_chat_message(message, is_user=message['is_user'])
+    # Chat input
+    with st.container():
+        st.markdown('<div class="chat-input">', unsafe_allow_html=True)
+        question = st.text_input("Ask a question about the document:", key="chat_input")
+        st.markdown('</div>', unsafe_allow_html=True)
         if question:
+            # Add user question to chat history
+            st.session_state.chat_history.append({'text': question, 'is_user': True})
             with st.spinner("Finding answer..."):
                 try:
                     result = find_answer(question, st.session_state.text_data, qa_model)
+                    # Create bot response
+                    bot_response = {
+                        'text': result['answer'],
+                        'metadata': f"Confidence: {result['confidence']:.2%} | Page: {result['metadata']['page']}, "
+                                    f"Paragraph: {result['metadata']['paragraph']}, Line: {result['metadata']['line']}",
+                        'is_user': False
+                    }
+                    # Add bot response to chat history
+                    st.session_state.chat_history.append(bot_response)
+                    # Force a rerun to update the chat display
+                    st.experimental_rerun()
                 except Exception as e:
                     st.error(f"Error finding answer: {str(e)}")
     # Instructions
+    if not pdf_file:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
+            3. Start asking questions about the document
+            4. Get detailed answers with page numbers and confidence scores
             ### Features:
+            - Chat-like interface for asking multiple questions
             - Extracts answers from PDF documents
             - Provides page numbers and line information
             - Shows confidence scores
             - Handles multiple page documents
         """)