Spaces:

jasvir-singh1021
/

Easy-data-parser

Sleeping

App Files Files Community

jasvir-singh1021 commited on Jul 27, 2025

Commit

aa87ef2

verified ·

1 Parent(s): 3d4310e

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +79 -79

src/streamlit_app.py CHANGED Viewed

@@ -1,115 +1,115 @@
 import streamlit as st
 import json
-from datetime import datetime
-# Page config
-st.set_page_config(page_title="Document Parser", layout="wide", page_icon="📄")
-# Initialize state
 if "conversation" not in st.session_state:
     st.session_state.conversation = []
-if "last_question" not in st.session_state:
-    st.session_state.last_question = None
-# Sidebar settings
 with st.sidebar:
     st.title("⚙️ Settings")
-    api_key = st.text_input("🔑 OpenAI API Key", type="password", help="Paste your OpenAI API key")
-    temperature = st.slider("🔥 Model Temperature", 0.0, 1.0, 0.3, 0.05, help="Higher values make responses more creative.")
-    st.markdown("---")
-    st.caption("Built with ❤️ using Streamlit")
-# Title & instructions
 st.title("📄 Document Parser")
-st.markdown("Upload your documents and ask questions powered by GPT-4 and LlamaIndex (or mock engine).")
-# File upload
 uploaded_files = st.file_uploader(
-    "📤 Upload Documents (PDF, DOCX, TXT, etc.)",
-    type=["pdf", "docx", "doc", "txt", "rtf", "html"],
     accept_multiple_files=True
 )
-# File display
 if uploaded_files:
-    st.success(f"{len(uploaded_files)} document(s) uploaded.")
-    with st.expander("📚 Uploaded Files Overview"):
-        for file in uploaded_files:
-            st.write(f"• `{file.name}` ({round(file.size / 1024, 2)} KB)")
-        st.markdown("✅ Ready to ask questions.")
 else:
-    st.warning("⚠️ Please upload at least one document.")
-# Suggestive prompts
-if uploaded_files:
-    st.markdown("#### 💡 Suggested Questions")
-    suggestions = [
-        "What is the main topic of the documents?",
-        "Summarize the contents.",
-        "Are there any deadlines or dates mentioned?",
-        "What are the key takeaways?"
-    ]
-    for i, s in enumerate(suggestions):
-        if st.button(f"💬 {s}", key=f"suggestion_{i}"):
-            st.session_state.last_question = s
-# Text input
-question = st.text_input("🔎 Ask a question about your documents:", value=st.session_state.last_question or "")
-# Ask button
-ask_col, retry_col = st.columns([4, 1])
-with ask_col:
-    send = st.button("🚀 Ask")
-with retry_col:
-    retry = st.button("🔁 Retry")
-if (send or retry) and question and api_key and uploaded_files:
-    st.session_state.last_question = question
-    with st.spinner("Analyzing your documents..."):
-        # TODO: Replace this with actual LLM logic
-        mock_answer = f"🤖 Here's a simulated response to your question: **'{question}'**"
-        # Tag this session
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        # Store in conversation
-        st.session_state.conversation.append({
-            "role": "user",
-            "content": question,
-            "timestamp": timestamp
-        })
-        st.session_state.conversation.append({
-            "role": "assistant",
-            "content": mock_answer,
-            "timestamp": timestamp
-        })
-elif send or retry:
-    st.error("Please make sure you've uploaded documents and provided an API key.")
-# Conversation history
 if st.session_state.conversation:
-    st.markdown("## 🧾 Conversation History")
     for msg in st.session_state.conversation:
-        author = "🧑 You" if msg["role"] == "user" else "🤖 Assistant"
-        st.markdown(f"**{author}** *(at {msg['timestamp']})*:\n\n{msg['content']}", unsafe_allow_html=True)
-    # Actions
     st.markdown("---")
     col1, col2 = st.columns(2)
     with col1:
         if st.button("🗑️ Clear Conversation"):
             st.session_state.conversation = []
-            st.session_state.last_question = None
             st.experimental_rerun()
     with col2:
         format = st.selectbox("Download Format", ["TXT", "JSON"])
         if format == "TXT":
             content = "\n\n".join(
-                f"{msg['role'].capitalize()} ({msg['timestamp']}):\n{msg['content']}"
-                for msg in st.session_state.conversation
             )
             mime = "text/plain"
             filename = "conversation.txt"
@@ -118,4 +118,4 @@ if st.session_state.conversation:
             mime = "application/json"
             filename = "conversation.json"
-        st.download_button("📥 Download", content, file_name=filename, mime=mime)

 import streamlit as st
+import openai
 import json
+from PyPDF2 import PdfReader
+from docx import Document
+import html2text
+# Configure the page
+st.set_page_config(page_title="Document Parser", layout="wide")
+# Initialize session state
 if "conversation" not in st.session_state:
     st.session_state.conversation = []
+# Sidebar for API key and settings
 with st.sidebar:
     st.title("⚙️ Settings")
+    api_key = st.text_input("🔑 OpenAI API Key", type="password")
+    temperature = st.slider("🔥 Model Temperature", 0.0, 1.0, 0.3, 0.1)
+# Title
 st.title("📄 Document Parser")
+st.markdown("Upload documents and ask questions about their content using GPT-4.")
+# File uploader
 uploaded_files = st.file_uploader(
+    "📤 Upload Documents (PDF, DOCX, TXT, HTML)",
+    type=["pdf", "docx", "txt", "html"],
     accept_multiple_files=True
 )
+# Extract text from uploaded files
+def extract_text(file):
+    try:
+        if file.type == "application/pdf":
+            reader = PdfReader(file)
+            return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
+        elif file.type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
+            doc = Document(file)
+            return "\n".join(p.text for p in doc.paragraphs)
+        elif file.type == "text/html":
+            html = file.read().decode("utf-8")
+            return html2text.html2text(html)
+        else:
+            return file.read().decode("utf-8")
+    except Exception as e:
+        return f"[Error reading {file.name}: {e}]"
+# Combine all text
+all_text = ""
 if uploaded_files:
+    for file in uploaded_files:
+        all_text += f"\n--- {file.name} ---\n"
+        all_text += extract_text(file)
+    st.success(f"{len(uploaded_files)} document(s) processed.")
 else:
+    st.info("Please upload at least one document to continue.")
+# Question input
+question = st.text_input("💬 Ask a question about your documents:")
+# Send to OpenAI
+if st.button("🚀 Ask") and question and uploaded_files and api_key:
+    with st.spinner("Processing with GPT-4..."):
+        try:
+            openai.api_key = api_key
+            prompt = (
+                "You are a document assistant. Based on the following content, answer the question clearly.\n\n"
+                f"{all_text}\n\nQuestion: {question}"
+            )
+            response = openai.ChatCompletion.create(
+                model="gpt-4",
+                temperature=temperature,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that answers questions based on document content."},
+                    {"role": "user", "content": prompt}
+                ]
+            )
+            answer = response.choices[0].message.content.strip()
+            st.session_state.conversation.append({"role": "user", "content": question})
+            st.session_state.conversation.append({"role": "assistant", "content": answer})
+        except Exception as e:
+            st.error(f"Error: {e}")
+# Show conversation
 if st.session_state.conversation:
+    st.markdown("## 🧾 Conversation")
     for msg in st.session_state.conversation:
+        if msg["role"] == "user":
+            st.markdown(f"**You:** {msg['content']}")
+        else:
+            st.markdown(f"**Assistant:** {msg['content']}")
     st.markdown("---")
     col1, col2 = st.columns(2)
     with col1:
         if st.button("🗑️ Clear Conversation"):
             st.session_state.conversation = []
             st.experimental_rerun()
     with col2:
         format = st.selectbox("Download Format", ["TXT", "JSON"])
         if format == "TXT":
             content = "\n\n".join(
+                f"{msg['role'].capitalize()}:\n{msg['content']}" for msg in st.session_state.conversation
             )
             mime = "text/plain"
             filename = "conversation.txt"
             mime = "application/json"
             filename = "conversation.json"
+        st.download_button("📥 Download", content, filename=filename, mime=mime)