jasvir-singh1021's picture
Update app.py
714614e verified
import streamlit as st
import openai
import os
import json
from io import StringIO
from PyPDF2 import PdfReader
from docx import Document
import html2text
# Optional: Prevent config issues on HF Spaces
os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
# Configure Streamlit page
st.set_page_config(page_title="Document Parser", layout="wide")
# Session state to hold chat history
if "conversation" not in st.session_state:
st.session_state.conversation = []
# Sidebar settings
with st.sidebar:
st.title("Settings")
api_key = st.text_input("OpenAI API Key", type="password")
temperature = st.slider("Temperature", 0.0, 1.0, 0.3, 0.1)
# Main UI
st.title("Document Parser")
st.markdown("Upload documents and ask questions using GPT.")
# File uploader
uploaded_files = st.file_uploader(
"Upload Documents (PDF, DOCX, TXT, HTML)",
type=["pdf", "docx", "txt", "html"],
accept_multiple_files=True
)
def extract_text(file):
ext = file.name.lower().split(".")[-1]
if ext == "pdf":
reader = PdfReader(file)
return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
elif ext == "docx":
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif ext == "txt":
return file.read().decode("utf-8")
elif ext == "html":
return html2text.html2text(file.read().decode("utf-8"))
else:
return ""
# Input field
question = st.text_input("Ask a question about the uploaded documents:")
# When "Ask" button is clicked
if st.button("Ask") and uploaded_files and question and api_key:
with st.spinner("Processing..."):
# Extract and combine text from all uploaded files
combined_text = ""
for file in uploaded_files:
combined_text += extract_text(file) + "\n"
if not combined_text.strip():
st.warning("Could not extract text from uploaded files.")
else:
try:
openai.api_key = api_key
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that answers questions based on uploaded documents."},
{"role": "user", "content": f"DOCUMENT:\n{combined_text[:6000]}\n\nQUESTION:\n{question}"}
],
temperature=temperature,
)
answer = response["choices"][0]["message"]["content"]
# Update conversation history
st.session_state.conversation.append({"role": "user", "content": question})
st.session_state.conversation.append({"role": "assistant", "content": answer})
except Exception as e:
st.error(f"Error from OpenAI: {e}")
# Display conversation
if st.session_state.conversation:
st.markdown("## Conversation")
for msg in st.session_state.conversation:
st.markdown(f"**{'You' if msg['role'] == 'user' else 'Assistant'}:** {msg['content']}")
st.markdown("---")
col1, col2 = st.columns(2)
with col1:
if st.button("Clear Conversation"):
st.session_state.conversation = []
st.experimental_rerun()
with col2:
format = st.selectbox("Download Format", ["TXT", "JSON"])
if format == "TXT":
content = "\n\n".join(
f"{msg['role'].capitalize()}:\n{msg['content']}" for msg in st.session_state.conversation
)
mime = "text/plain"
filename = "conversation.txt"
else:
content = json.dumps(st.session_state.conversation, indent=2)
mime = "application/json"
filename = "conversation.json"
st.download_button("Download", content, file_name=filename, mime=mime)