Spaces:

sidbhasin
/

PDF_Answer_AI_By_Syncmerce

Sleeping

App Files Files Community

sidbhasin commited on Nov 12, 2024

Commit

3f21fcc

verified ·

1 Parent(s): 633407a

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -132

app.py CHANGED Viewed

@@ -12,48 +12,33 @@ st.set_page_config(
     layout="wide"
 )
-# Custom CSS for better styling
 st.markdown("""
     <style>
     .chat-container {
-        display: flex;
-        flex-direction: column;
-        gap: 20px;
         padding: 20px;
-        height: calc(100vh - 200px);
-        overflow-y: auto;
     }
-    .message-container {
-        display: flex;
-        flex-direction: column;
-        gap: 10px;
         padding: 15px;
         border-radius: 10px;
-        max-width: 90%;
-    }
-    .user-message {
-        background-color: #2b313e;
-        color: white;
-        align-self: flex-end;
     }
     .assistant-message {
         background-color: #f0f2f6;
-        color: black;
-        align-self: flex-start;
     }
     .source-info {
         font-size: 0.8em;
         color: #666;
         border-top: 1px solid #ddd;
-        margin-top: 10px;
-        padding-top: 10px;
-    }
-    .context-box {
-        background-color: #f8f9fa;
-        border-left: 3px solid #1f77b4;
-        padding: 10px;
-        margin-top: 10px;
-        font-size: 0.9em;
     }
     .chat-input {
         position: fixed;
@@ -64,19 +49,29 @@ st.markdown("""
         background: white;
         border-top: 1px solid #ddd;
     }
     </style>
 """, unsafe_allow_html=True)
 @st.cache_resource
-def load_qa_model():
     return pipeline(
         "question-answering",
         model="deepset/roberta-base-squad2",
         tokenizer="deepset/roberta-base-squad2"
     )
-def process_pdf(pdf_file):
     text_data = []
     with pdfplumber.open(pdf_file) as pdf:
         for page_num, page in enumerate(pdf.pages, 1):
             text = page.extract_text()
@@ -92,50 +87,47 @@ def process_pdf(pdf_file):
                         })
     return text_data
-def find_best_answer(question, text_data, qa_model):
     best_answer = None
     max_score = 0
-    relevant_context = []
-    for chunk in text_data:
-        try:
-            result = qa_model(
-                question=question,
-                context=chunk['text'],
-                max_answer_len=100
-            )
-            if result['score'] > max_score:
-                max_score = result['score']
-                best_answer = {
-                    'answer': result['answer'],
                     'confidence': result['score'],
-                    'page': chunk['page'],
-                    'paragraph': chunk['paragraph'],
-                    'context': chunk['context']
                 }
-            # Collect relevant contexts
-            if result['score'] > 0.1:  # Threshold for relevance
-                relevant_context.append(chunk['context'])
-        except Exception as e:
-            continue
-    return best_answer, relevant_context[:3]  # Return top 3 relevant contexts
 def main():
-    st.title("📚 Advanced PDF Question Answering")
-    # Initialize session state
-    if 'messages' not in st.session_state:
-        st.session_state.messages = []
-    if 'pdf_data' not in st.session_state:
-        st.session_state.pdf_data = None
-    # Load QA model
     try:
-        qa_model = load_qa_model()
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         return
@@ -143,91 +135,78 @@ def main():
     # File upload
     pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
-    if pdf_file and not st.session_state.pdf_data:
         with st.spinner("Processing PDF..."):
             try:
-                st.session_state.pdf_data = process_pdf(pdf_file)
-                st.success("PDF processed successfully! You can now ask questions.")
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
-    # Chat interface
-    st.markdown('<div class="chat-container">', unsafe_allow_html=True)
-    # Display chat history
-    for message in st.session_state.messages:
-        if message["role"] == "user":
-            st.markdown(f"""
-                <div class="message-container user-message">
-                    {message["content"]}
-                </div>
-            """, unsafe_allow_html=True)
-        else:
-            st.markdown(f"""
-                <div class="message-container assistant-message">
-                    <div>{message["content"]}</div>
-                    <div class="source-info">
-                        Source: Page {message["metadata"]["page"]},
-                        Paragraph {message["metadata"]["paragraph"]}
-                        (Confidence: {message["metadata"]["confidence"]:.1%})
                     </div>
-                    <div class="context-box">
-                        {message["metadata"]["context"]}
-                    </div>
-                </div>
-            """, unsafe_allow_html=True)
-    st.markdown('</div>', unsafe_allow_html=True)
-    # Question input
-    if st.session_state.pdf_data:
-        question = st.text_input("Ask a question about the document:", key="question_input")
-        if question:
-            # Add user question to chat history
-            st.session_state.messages.append({"role": "user", "content": question})
-            # Generate answer
-            with st.spinner("Finding answer..."):
-                answer, relevant_contexts = find_best_answer(
-                    question,
-                    st.session_state.pdf_data,
-                    qa_model
-                )
-                if answer:
-                    # Add assistant response to chat history
-                    st.session_state.messages.append({
-                        "role": "assistant",
-                        "content": answer["answer"],
-                        "metadata": {
-                            "page": answer["page"],
-                            "paragraph": answer["paragraph"],
-                            "confidence": answer["confidence"],
-                            "context": answer["context"]
-                        }
-                    })
-                    # Force refresh
-                    st.rerun()
-                else:
-                    st.error("Sorry, I couldn't find a relevant answer in the document.")
     else:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
-            3. Start asking questions about the content
-            4. Get detailed answers with source information and context
             ### Features:
-            - Natural conversation interface
-            - Source tracking with page numbers
-            - Confidence scores
-            - Relevant context display
-            - Multiple question support
         """)
 if __name__ == "__main__":

     layout="wide"
 )
+# Custom CSS for better chat interface
 st.markdown("""
     <style>
     .chat-container {
+        border-radius: 10px;
+        margin-bottom: 20px;
         padding: 20px;
     }
+    .user-message {
+        background-color: #e6f3ff;
         padding: 15px;
         border-radius: 10px;
+        margin: 10px 0;
+        text-align: right;
     }
     .assistant-message {
         background-color: #f0f2f6;
+        padding: 15px;
+        border-radius: 10px;
+        margin: 10px 0;
     }
     .source-info {
         font-size: 0.8em;
         color: #666;
+        margin-top: 5px;
+        padding-top: 5px;
         border-top: 1px solid #ddd;
     }
     .chat-input {
         position: fixed;
         background: white;
         border-top: 1px solid #ddd;
     }
+    .main {
+        margin-bottom: 100px;  /* Space for fixed chat input */
+    }
     </style>
 """, unsafe_allow_html=True)
+# Initialize session state
+if 'messages' not in st.session_state:
+    st.session_state.messages = []
+if 'text_data' not in st.session_state:
+    st.session_state.text_data = None
 @st.cache_resource
+def load_model():
     return pipeline(
         "question-answering",
         model="deepset/roberta-base-squad2",
         tokenizer="deepset/roberta-base-squad2"
     )
+def extract_text_with_metadata(pdf_file):
     text_data = []
     with pdfplumber.open(pdf_file) as pdf:
         for page_num, page in enumerate(pdf.pages, 1):
             text = page.extract_text()
                         })
     return text_data
+def find_answer(question, text_data, qa_model):
     best_answer = None
     max_score = 0
+    # Combine all text for context
+    full_text = ' '.join([item['text'] for item in text_data])
+    try:
+        # Get answer from model
+        result = qa_model(question=question, context=full_text)
+        # Find the source paragraph
+        answer_text = result['answer']
+        for item in text_data:
+            if answer_text in item['text']:
+                return {
+                    'answer': answer_text,
                     'confidence': result['score'],
+                    'page': item['page'],
+                    'paragraph': item['paragraph'],
+                    'context': item['text']
                 }
+        # If exact paragraph not found, return with first paragraph
+        return {
+            'answer': answer_text,
+            'confidence': result['score'],
+            'page': 1,
+            'paragraph': 1,
+            'context': text_data[0]['text']
+        }
+    except Exception as e:
+        st.error(f"Error finding answer: {str(e)}")
+        return None
 def main():
+    st.title("📚 PDF Chat Assistant")
     try:
+        qa_model = load_model()
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         return
     # File upload
     pdf_file = st.file_uploader("Upload PDF Document", type=['pdf'])
+    if pdf_file and not st.session_state.text_data:
         with st.spinner("Processing PDF..."):
             try:
+                st.session_state.text_data = extract_text_with_metadata(pdf_file)
+                st.success("PDF processed successfully! You can now ask questions below.")
             except Exception as e:
                 st.error(f"Error processing PDF: {str(e)}")
                 return
+    # Display chat interface if PDF is processed
+    if st.session_state.text_data:
+        # Chat history
+        st.markdown('<div class="chat-container">', unsafe_allow_html=True)
+        for message in st.session_state.messages:
+            if message["role"] == "user":
+                st.markdown(f'<div class="user-message">{message["content"]}</div>',
+                          unsafe_allow_html=True)
+            else:
+                st.markdown(f"""
+                    <div class="assistant-message">
+                        <div>{message["content"]}</div>
+                        <div class="source-info">
+                            Source: Page {message["metadata"]["page"]},
+                            Paragraph {message["metadata"]["paragraph"]}
+                            (Confidence: {message["metadata"]["confidence"]:.1%})
+                        </div>
                     </div>
+                """, unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+        # Chat input
+        with st.container():
+            st.markdown('<div class="chat-input">', unsafe_allow_html=True)
+            question = st.text_input("Ask a question about the document:", key="question_input")
+            st.markdown('</div>', unsafe_allow_html=True)
+            if question:
+                # Add user question to chat history
+                st.session_state.messages.append({"role": "user", "content": question})
+                # Get answer
+                with st.spinner("Finding answer..."):
+                    answer = find_answer(question, st.session_state.text_data, qa_model)
+                    if answer:
+                        # Add assistant response to chat history
+                        st.session_state.messages.append({
+                            "role": "assistant",
+                            "content": answer["answer"],
+                            "metadata": {
+                                "page": answer["page"],
+                                "paragraph": answer["paragraph"],
+                                "confidence": answer["confidence"],
+                                "context": answer["context"]
+                            }
+                        })
+                        # Rerun to update chat display
+                        st.rerun()
     else:
         st.markdown("""
             ### Instructions:
             1. Upload a PDF document using the file uploader above
             2. Wait for the document to be processed
+            3. Use the chat interface to ask questions
+            4. Get answers with source information
             ### Features:
+            - Chat-like interface
+            - Source tracking
+            - Context preservation
+            - Multiple questions support
         """)
 if __name__ == "__main__":