jasvir-singh1021 commited on
Commit
c6c3565
Β·
verified Β·
1 Parent(s): b9476b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -20
app.py CHANGED
@@ -1,49 +1,95 @@
1
  import streamlit as st
 
 
2
  import json
 
 
 
 
3
 
 
 
 
 
4
  st.set_page_config(page_title="Document Parser", layout="wide")
5
 
 
6
  if "conversation" not in st.session_state:
7
  st.session_state.conversation = []
8
 
 
9
  with st.sidebar:
10
  st.title("βš™οΈ Settings")
11
  api_key = st.text_input("πŸ”‘ OpenAI API Key", type="password")
12
- temperature = st.slider("πŸ”₯ Model Temperature", 0.0, 1.0, 0.0, 0.1)
13
 
 
14
  st.title("πŸ“„ Document Parser")
15
- st.markdown("Upload documents and chat with a GPT-4 powered assistant.")
16
 
 
17
  uploaded_files = st.file_uploader(
18
- "πŸ“€ Upload Documents (PDF, DOCX, TXT, etc.)",
19
- type=["pdf", "docx", "doc", "txt", "rtf", "html"],
20
  accept_multiple_files=True
21
  )
22
 
23
- if uploaded_files:
24
- st.success(f"{len(uploaded_files)} document(s) uploaded.")
25
- else:
26
- st.info("Please upload at least one document to continue.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- question = st.text_input("πŸ’¬ Ask a question about your documents:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- if st.button("πŸš€ Ask") and question and uploaded_files and api_key:
31
- with st.spinner("Processing..."):
32
- # Mock answer logic here β€” replace with your OpenAI API call if needed
33
- mock_answer = f"🧠 Based on the uploaded documents, here's a mock answer to: '{question}'"
34
- st.session_state.conversation.append({"role": "user", "content": question})
35
- st.session_state.conversation.append({"role": "assistant", "content": mock_answer})
36
 
 
37
  if st.session_state.conversation:
38
  st.markdown("## 🧾 Conversation")
39
  for msg in st.session_state.conversation:
40
- if msg["role"] == "user":
41
- st.markdown(f"**You:** {msg['content']}")
42
- else:
43
- st.markdown(f"**Assistant:** {msg['content']}")
44
 
45
  st.markdown("---")
46
-
47
  col1, col2 = st.columns(2)
48
 
49
  with col1:
 
1
  import streamlit as st
2
+ import openai
3
+ import os
4
  import json
5
+ from io import StringIO
6
+ from PyPDF2 import PdfReader
7
+ from docx import Document
8
+ import html2text
9
 
10
+ # Optional: Prevent config issues on HF Spaces
11
+ os.environ["STREAMLIT_CONFIG_DIR"] = "/tmp/.streamlit"
12
+
13
+ # Configure Streamlit page
14
  st.set_page_config(page_title="Document Parser", layout="wide")
15
 
16
+ # Session state to hold chat history
17
  if "conversation" not in st.session_state:
18
  st.session_state.conversation = []
19
 
20
+ # Sidebar settings
21
  with st.sidebar:
22
  st.title("βš™οΈ Settings")
23
  api_key = st.text_input("πŸ”‘ OpenAI API Key", type="password")
24
+ temperature = st.slider("πŸ”₯ Temperature", 0.0, 1.0, 0.3, 0.1)
25
 
26
+ # Main UI
27
  st.title("πŸ“„ Document Parser")
28
+ st.markdown("Upload documents and ask questions using GPT.")
29
 
30
+ # File uploader
31
  uploaded_files = st.file_uploader(
32
+ "πŸ“€ Upload Documents (PDF, DOCX, TXT, HTML)",
33
+ type=["pdf", "docx", "txt", "html"],
34
  accept_multiple_files=True
35
  )
36
 
37
+ def extract_text(file):
38
+ ext = file.name.lower().split(".")[-1]
39
+ if ext == "pdf":
40
+ reader = PdfReader(file)
41
+ return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
42
+ elif ext == "docx":
43
+ doc = Document(file)
44
+ return "\n".join([para.text for para in doc.paragraphs])
45
+ elif ext == "txt":
46
+ return file.read().decode("utf-8")
47
+ elif ext == "html":
48
+ return html2text.html2text(file.read().decode("utf-8"))
49
+ else:
50
+ return ""
51
+
52
+ # Input field
53
+ question = st.text_input("πŸ’¬ Ask a question about the uploaded documents:")
54
+
55
+ # When "Ask" button is clicked
56
+ if st.button("πŸš€ Ask") and uploaded_files and question and api_key:
57
+ with st.spinner("🧠 Thinking..."):
58
+
59
+ # Extract and combine text from all uploaded files
60
+ combined_text = ""
61
+ for file in uploaded_files:
62
+ combined_text += extract_text(file) + "\n"
63
 
64
+ if not combined_text.strip():
65
+ st.warning("⚠️ Could not extract text from uploaded files.")
66
+ else:
67
+ try:
68
+ openai.api_key = api_key
69
+ response = openai.ChatCompletion.create(
70
+ model="gpt-4",
71
+ messages=[
72
+ {"role": "system", "content": "You are a helpful assistant that answers questions based on uploaded documents."},
73
+ {"role": "user", "content": f"DOCUMENT:\n{combined_text[:6000]}\n\nQUESTION:\n{question}"}
74
+ ],
75
+ temperature=temperature,
76
+ )
77
+ answer = response["choices"][0]["message"]["content"]
78
+
79
+ # Update conversation history
80
+ st.session_state.conversation.append({"role": "user", "content": question})
81
+ st.session_state.conversation.append({"role": "assistant", "content": answer})
82
 
83
+ except Exception as e:
84
+ st.error(f"❌ Error from OpenAI: {e}")
 
 
 
 
85
 
86
+ # Display conversation
87
  if st.session_state.conversation:
88
  st.markdown("## 🧾 Conversation")
89
  for msg in st.session_state.conversation:
90
+ st.markdown(f"**{'You' if msg['role'] == 'user' else 'Assistant'}:** {msg['content']}")
 
 
 
91
 
92
  st.markdown("---")
 
93
  col1, col2 = st.columns(2)
94
 
95
  with col1: