jasvir-singh1021 commited on
Commit
aa87ef2
Β·
verified Β·
1 Parent(s): 3d4310e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +79 -79
src/streamlit_app.py CHANGED
@@ -1,115 +1,115 @@
1
  import streamlit as st
 
2
  import json
3
- from datetime import datetime
 
 
4
 
5
- # Page config
6
- st.set_page_config(page_title="Document Parser", layout="wide", page_icon="πŸ“„")
7
 
8
- # Initialize state
9
  if "conversation" not in st.session_state:
10
  st.session_state.conversation = []
11
- if "last_question" not in st.session_state:
12
- st.session_state.last_question = None
13
 
14
- # Sidebar settings
15
  with st.sidebar:
16
  st.title("βš™οΈ Settings")
17
- api_key = st.text_input("πŸ”‘ OpenAI API Key", type="password", help="Paste your OpenAI API key")
18
- temperature = st.slider("πŸ”₯ Model Temperature", 0.0, 1.0, 0.3, 0.05, help="Higher values make responses more creative.")
19
- st.markdown("---")
20
- st.caption("Built with ❀️ using Streamlit")
21
 
22
- # Title & instructions
23
  st.title("πŸ“„ Document Parser")
24
- st.markdown("Upload your documents and ask questions powered by GPT-4 and LlamaIndex (or mock engine).")
25
 
26
- # File upload
27
  uploaded_files = st.file_uploader(
28
- "πŸ“€ Upload Documents (PDF, DOCX, TXT, etc.)",
29
- type=["pdf", "docx", "doc", "txt", "rtf", "html"],
30
  accept_multiple_files=True
31
  )
32
 
33
- # File display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if uploaded_files:
35
- st.success(f"{len(uploaded_files)} document(s) uploaded.")
36
- with st.expander("πŸ“š Uploaded Files Overview"):
37
- for file in uploaded_files:
38
- st.write(f"β€’ `{file.name}` ({round(file.size / 1024, 2)} KB)")
39
- st.markdown("βœ… Ready to ask questions.")
40
  else:
41
- st.warning("⚠️ Please upload at least one document.")
42
 
43
- # Suggestive prompts
44
- if uploaded_files:
45
- st.markdown("#### πŸ’‘ Suggested Questions")
46
- suggestions = [
47
- "What is the main topic of the documents?",
48
- "Summarize the contents.",
49
- "Are there any deadlines or dates mentioned?",
50
- "What are the key takeaways?"
51
- ]
52
- for i, s in enumerate(suggestions):
53
- if st.button(f"πŸ’¬ {s}", key=f"suggestion_{i}"):
54
- st.session_state.last_question = s
55
-
56
- # Text input
57
- question = st.text_input("πŸ”Ž Ask a question about your documents:", value=st.session_state.last_question or "")
58
-
59
- # Ask button
60
- ask_col, retry_col = st.columns([4, 1])
61
- with ask_col:
62
- send = st.button("πŸš€ Ask")
63
- with retry_col:
64
- retry = st.button("πŸ” Retry")
65
-
66
- if (send or retry) and question and api_key and uploaded_files:
67
- st.session_state.last_question = question
68
- with st.spinner("Analyzing your documents..."):
69
- # TODO: Replace this with actual LLM logic
70
- mock_answer = f"πŸ€– Here's a simulated response to your question: **'{question}'**"
71
-
72
- # Tag this session
73
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
74
-
75
- # Store in conversation
76
- st.session_state.conversation.append({
77
- "role": "user",
78
- "content": question,
79
- "timestamp": timestamp
80
- })
81
- st.session_state.conversation.append({
82
- "role": "assistant",
83
- "content": mock_answer,
84
- "timestamp": timestamp
85
- })
86
-
87
- elif send or retry:
88
- st.error("Please make sure you've uploaded documents and provided an API key.")
89
-
90
- # Conversation history
91
  if st.session_state.conversation:
92
- st.markdown("## 🧾 Conversation History")
93
  for msg in st.session_state.conversation:
94
- author = "πŸ§‘ You" if msg["role"] == "user" else "πŸ€– Assistant"
95
- st.markdown(f"**{author}** *(at {msg['timestamp']})*:\n\n{msg['content']}", unsafe_allow_html=True)
 
 
96
 
97
- # Actions
98
  st.markdown("---")
 
99
  col1, col2 = st.columns(2)
100
 
101
  with col1:
102
  if st.button("πŸ—‘οΈ Clear Conversation"):
103
  st.session_state.conversation = []
104
- st.session_state.last_question = None
105
  st.experimental_rerun()
106
 
107
  with col2:
108
  format = st.selectbox("Download Format", ["TXT", "JSON"])
109
  if format == "TXT":
110
  content = "\n\n".join(
111
- f"{msg['role'].capitalize()} ({msg['timestamp']}):\n{msg['content']}"
112
- for msg in st.session_state.conversation
113
  )
114
  mime = "text/plain"
115
  filename = "conversation.txt"
@@ -118,4 +118,4 @@ if st.session_state.conversation:
118
  mime = "application/json"
119
  filename = "conversation.json"
120
 
121
- st.download_button("πŸ“₯ Download", content, file_name=filename, mime=mime)
 
1
  import streamlit as st
2
+ import openai
3
  import json
4
+ from PyPDF2 import PdfReader
5
+ from docx import Document
6
+ import html2text
7
 
8
+ # Configure the page
9
+ st.set_page_config(page_title="Document Parser", layout="wide")
10
 
11
+ # Initialize session state
12
  if "conversation" not in st.session_state:
13
  st.session_state.conversation = []
 
 
14
 
15
+ # Sidebar for API key and settings
16
  with st.sidebar:
17
  st.title("βš™οΈ Settings")
18
+ api_key = st.text_input("πŸ”‘ OpenAI API Key", type="password")
19
+ temperature = st.slider("πŸ”₯ Model Temperature", 0.0, 1.0, 0.3, 0.1)
 
 
20
 
21
+ # Title
22
  st.title("πŸ“„ Document Parser")
23
+ st.markdown("Upload documents and ask questions about their content using GPT-4.")
24
 
25
+ # File uploader
26
  uploaded_files = st.file_uploader(
27
+ "πŸ“€ Upload Documents (PDF, DOCX, TXT, HTML)",
28
+ type=["pdf", "docx", "txt", "html"],
29
  accept_multiple_files=True
30
  )
31
 
32
+ # Extract text from uploaded files
33
+ def extract_text(file):
34
+ try:
35
+ if file.type == "application/pdf":
36
+ reader = PdfReader(file)
37
+ return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
38
+ elif file.type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
39
+ doc = Document(file)
40
+ return "\n".join(p.text for p in doc.paragraphs)
41
+ elif file.type == "text/html":
42
+ html = file.read().decode("utf-8")
43
+ return html2text.html2text(html)
44
+ else:
45
+ return file.read().decode("utf-8")
46
+ except Exception as e:
47
+ return f"[Error reading {file.name}: {e}]"
48
+
49
+ # Combine all text
50
+ all_text = ""
51
  if uploaded_files:
52
+ for file in uploaded_files:
53
+ all_text += f"\n--- {file.name} ---\n"
54
+ all_text += extract_text(file)
55
+ st.success(f"{len(uploaded_files)} document(s) processed.")
 
56
  else:
57
+ st.info("Please upload at least one document to continue.")
58
 
59
+ # Question input
60
+ question = st.text_input("πŸ’¬ Ask a question about your documents:")
61
+
62
+ # Send to OpenAI
63
+ if st.button("πŸš€ Ask") and question and uploaded_files and api_key:
64
+ with st.spinner("Processing with GPT-4..."):
65
+ try:
66
+ openai.api_key = api_key
67
+
68
+ prompt = (
69
+ "You are a document assistant. Based on the following content, answer the question clearly.\n\n"
70
+ f"{all_text}\n\nQuestion: {question}"
71
+ )
72
+
73
+ response = openai.ChatCompletion.create(
74
+ model="gpt-4",
75
+ temperature=temperature,
76
+ messages=[
77
+ {"role": "system", "content": "You are a helpful assistant that answers questions based on document content."},
78
+ {"role": "user", "content": prompt}
79
+ ]
80
+ )
81
+
82
+ answer = response.choices[0].message.content.strip()
83
+
84
+ st.session_state.conversation.append({"role": "user", "content": question})
85
+ st.session_state.conversation.append({"role": "assistant", "content": answer})
86
+
87
+ except Exception as e:
88
+ st.error(f"Error: {e}")
89
+
90
+ # Show conversation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  if st.session_state.conversation:
92
+ st.markdown("## 🧾 Conversation")
93
  for msg in st.session_state.conversation:
94
+ if msg["role"] == "user":
95
+ st.markdown(f"**You:** {msg['content']}")
96
+ else:
97
+ st.markdown(f"**Assistant:** {msg['content']}")
98
 
 
99
  st.markdown("---")
100
+
101
  col1, col2 = st.columns(2)
102
 
103
  with col1:
104
  if st.button("πŸ—‘οΈ Clear Conversation"):
105
  st.session_state.conversation = []
 
106
  st.experimental_rerun()
107
 
108
  with col2:
109
  format = st.selectbox("Download Format", ["TXT", "JSON"])
110
  if format == "TXT":
111
  content = "\n\n".join(
112
+ f"{msg['role'].capitalize()}:\n{msg['content']}" for msg in st.session_state.conversation
 
113
  )
114
  mime = "text/plain"
115
  filename = "conversation.txt"
 
118
  mime = "application/json"
119
  filename = "conversation.json"
120
 
121
+ st.download_button("πŸ“₯ Download", content, filename=filename, mime=mime)