Spaces:

chiichann
/

pdf_document_analyzer

Running

App Files Files Community

chiichann commited on Feb 18, 2025

Commit

12ffdf7

1 Parent(s): 93c282f

first sync with remote code

Browse files

Files changed (2) hide show

app.py +114 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+import google.generativeai as genai
+import streamlit as st
+from PyPDF2 import PdfReader
+from collections import Counter
+import re
+# Get the API key from environment variable
+api_key = os.getenv("GEMINI_API_KEY")
+if api_key is None:
+    st.error("API key not found. Please set the GEMINI_API_KEY environment variable.")
+else:
+    # Gemini Model Initialization
+    MODEL_ID = "gemini-2.0-flash-exp"
+    genai.configure(api_key=api_key)
+    model = genai.GenerativeModel(MODEL_ID)
+    # Correct initialization of the 'chat' object
+    chat = model.start_chat()
+    st.title("📚 AI-Powered Document Analyzer")
+    with st.expander("📖 **What is this app about?**"):
+        st.write("""
+        The **AI-Powered Document Analyzer** app is an AI-powered tool designed to help users extract valuable insights from any PDF document.
+        By leveraging **Gemini 2.0's Flash Experimental Model**, this intelligent system allows users to interactively engage with their documents,
+        making research and information retrieval more efficient.
+        """)
+    # Upload Section
+    st.header("Upload Document")
+    uploaded_file = st.file_uploader("Upload a PDF file to be analyzed", type=["pdf"])
+    def extract_text_from_pdf(file):
+        pdf_reader = PdfReader(file)
+        return "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
+    def extract_keywords(text, num_keywords=10):
+        words = re.findall(r'\b\w{4,}\b', text.lower())  # Extract words with 4+ letters
+        common_words = set("the and for with from this that have will are was were been has".split())  # Stop words
+        filtered_words = [word for word in words if word not in common_words]
+        most_common = Counter(filtered_words).most_common(num_keywords)
+        return [word for word, _ in most_common]
+    def generate_suggested_questions(keywords):
+        """Generate sample questions based on extracted keywords."""
+        questions = []
+        for keyword in keywords:
+            questions.append(f"What is the significance of {keyword} in the document?")
+            questions.append(f"Can you summarize the document's section on {keyword}?")
+        return questions
+    if uploaded_file:
+        document_text = extract_text_from_pdf(uploaded_file)
+        st.session_state["document_text"] = document_text
+        st.success("Document uploaded successfully!")
+        # Display Keyword Insights
+        st.header("🔑 Key Topic Insights")
+        keywords = extract_keywords(document_text)
+        st.write(", ".join(keywords))
+        # Generate Suggested Questions
+        st.session_state["suggested_questions"] = generate_suggested_questions(keywords)
+    else:
+        st.session_state.pop("document_text", None)  # Remove document text if no file is uploaded
+        st.session_state.pop("suggested_questions", None)
+    # Question-Answering Section
+    if "document_text" in st.session_state:
+        st.header("Ask AI About Your Document")
+        # Handle the selected question from buttons
+        if "selected_question" not in st.session_state:
+            st.session_state["selected_question"] = ""
+        def ask_ai(question):
+            """Process user question with the uploaded document."""
+            try:
+                prompt = f"Analyze the following document and answer: {question}\n\nDocument Content:\n{st.session_state['document_text'][:5000]}"
+                response = chat.send_message(prompt)  # Sending the message to 'chat'
+                return response.text
+            except Exception as e:
+                return f"Error: {e}"
+        # Text input for entering a question
+        selected_question = st.text_input(
+            "Enter your question about the document contents:",
+            value=st.session_state["selected_question"]
+        )
+        # Suggested Questions Section (between input and button)
+        if "suggested_questions" in st.session_state:
+            st.write("💡 **Suggested Questions:**")
+            # Limit to 5 questions
+            limited_suggested_questions = st.session_state["suggested_questions"][:5]
+            num_columns = len(limited_suggested_questions)
+            # Display in a row with smaller text
+            cols = st.columns(num_columns)
+            for i, question in enumerate(limited_suggested_questions):
+                with cols[i]:
+                    if st.button(f"🔹 {question}", key=f"btn_{i}"):
+                        st.session_state["selected_question"] = question
+        # Generate Answer Button
+        if st.button("Generate Answer") and selected_question:
+            with st.spinner("AI is reading the document..."):
+                response = ask_ai(selected_question)
+                st.markdown(f"**Response:** \n {response}")
+    else:
+        st.warning("Please upload a document to proceed.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+google-generativeai
+PyPDF2