Spaces:

jasvir-singh1021
/

Easy-data-parser

Sleeping

App Files Files Community

jasvir-singh1021 commited on Jul 27, 2025

Commit

c6c3565

verified ·

1 Parent(s): b9476b7

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -20

app.py CHANGED Viewed

@@ -1,49 +1,95 @@
 import streamlit as st
 import json
 st.set_page_config(page_title="Document Parser", layout="wide")
 if "conversation" not in st.session_state:
     st.session_state.conversation = []
 with st.sidebar:
     st.title("⚙️ Settings")
     api_key = st.text_input("🔑 OpenAI API Key", type="password")
-    temperature = st.slider("🔥 Model Temperature", 0.0, 1.0, 0.0, 0.1)
 st.title("📄 Document Parser")
-st.markdown("Upload documents and chat with a GPT-4 powered assistant.")
 uploaded_files = st.file_uploader(
-    "📤 Upload Documents (PDF, DOCX, TXT, etc.)",
-    type=["pdf", "docx", "doc", "txt", "rtf", "html"],
     accept_multiple_files=True
 )
-if uploaded_files:
-    st.success(f"{len(uploaded_files)} document(s) uploaded.")
-else:
-    st.info("Please upload at least one document to continue.")
-question = st.text_input("💬 Ask a question about your documents:")
-if st.button("🚀 Ask") and question and uploaded_files and api_key:
-    with st.spinner("Processing..."):
-        # Mock answer logic here — replace with your OpenAI API call if needed
-        mock_answer = f"🧠 Based on the uploaded documents, here's a mock answer to: '{question}'"
-        st.session_state.conversation.append({"role": "user", "content": question})
-        st.session_state.conversation.append({"role": "assistant", "content": mock_answer})
 if st.session_state.conversation:
     st.markdown("## 🧾 Conversation")
     for msg in st.session_state.conversation:
-        if msg["role"] == "user":
-            st.markdown(f"**You:** {msg['content']}")
-        else:
-            st.markdown(f"**Assistant:** {msg['content']}")
     st.markdown("---")
     col1, col2 = st.columns(2)
     with col1:

 import streamlit as st
+import openai
+import os
 import json
+from io import StringIO
+from PyPDF2 import PdfReader
+from docx import Document
+import html2text
+# Optional: Prevent config issues on HF Spaces
+os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
+# Configure Streamlit page
 st.set_page_config(page_title="Document Parser", layout="wide")
+# Session state to hold chat history
 if "conversation" not in st.session_state:
     st.session_state.conversation = []
+# Sidebar settings
 with st.sidebar:
     st.title("⚙️ Settings")
     api_key = st.text_input("🔑 OpenAI API Key", type="password")
+    temperature = st.slider("🔥 Temperature", 0.0, 1.0, 0.3, 0.1)
+# Main UI
 st.title("📄 Document Parser")
+st.markdown("Upload documents and ask questions using GPT.")
+# File uploader
 uploaded_files = st.file_uploader(
+    "📤 Upload Documents (PDF, DOCX, TXT, HTML)",
+    type=["pdf", "docx", "txt", "html"],
     accept_multiple_files=True
 )
+def extract_text(file):
+    ext = file.name.lower().split(".")[-1]
+    if ext == "pdf":
+        reader = PdfReader(file)
+        return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
+    elif ext == "docx":
+        doc = Document(file)
+        return "\n".join([para.text for para in doc.paragraphs])
+    elif ext == "txt":
+        return file.read().decode("utf-8")
+    elif ext == "html":
+        return html2text.html2text(file.read().decode("utf-8"))
+    else:
+        return ""
+# Input field
+question = st.text_input("💬 Ask a question about the uploaded documents:")
+# When "Ask" button is clicked
+if st.button("🚀 Ask") and uploaded_files and question and api_key:
+    with st.spinner("🧠 Thinking..."):
+        # Extract and combine text from all uploaded files
+        combined_text = ""
+        for file in uploaded_files:
+            combined_text += extract_text(file) + "\n"
+        if not combined_text.strip():
+            st.warning("⚠️ Could not extract text from uploaded files.")
+        else:
+            try:
+                openai.api_key = api_key
+                response = openai.ChatCompletion.create(
+                    model="gpt-4",
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant that answers questions based on uploaded documents."},
+                        {"role": "user", "content": f"DOCUMENT:\n{combined_text[:6000]}\n\nQUESTION:\n{question}"}
+                    ],
+                    temperature=temperature,
+                )
+                answer = response["choices"][0]["message"]["content"]
+                # Update conversation history
+                st.session_state.conversation.append({"role": "user", "content": question})
+                st.session_state.conversation.append({"role": "assistant", "content": answer})
+            except Exception as e:
+                st.error(f"❌ Error from OpenAI: {e}")
+# Display conversation
 if st.session_state.conversation:
     st.markdown("## 🧾 Conversation")
     for msg in st.session_state.conversation:
+        st.markdown(f"**{'You' if msg['role'] == 'user' else 'Assistant'}:** {msg['content']}")
     st.markdown("---")
     col1, col2 = st.columns(2)
     with col1: