TahaRasouli commited on
Commit
35ecede
·
verified ·
1 Parent(s): 1abb1bd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from typing import List
4
+ from unified_document_processor import UnifiedDocumentProcessor, CustomEmbeddingFunction
5
+
6
+ class StreamlitDocProcessor:
7
+ def __init__(self):
8
+ # Initialize session state
9
+ if 'processor' not in st.session_state:
10
+ groq_api_key = st.secrets["GROQ_API_KEY"]
11
+ st.session_state.processor = UnifiedDocumentProcessor(groq_api_key)
12
+
13
+ if 'processed_files' not in st.session_state:
14
+ st.session_state.processed_files = set()
15
+
16
+ def run(self):
17
+ st.title("Document Processing and Q&A System")
18
+
19
+ # Create sidebar for navigation
20
+ page = st.sidebar.selectbox(
21
+ "Choose a page",
22
+ ["Upload & Process", "Question & Answer"]
23
+ )
24
+
25
+ if page == "Upload & Process":
26
+ self.upload_and_process_page()
27
+ else:
28
+ self.qa_page()
29
+
30
+ def upload_and_process_page(self):
31
+ st.header("Upload and Process Documents")
32
+
33
+ # File uploader
34
+ uploaded_files = st.file_uploader(
35
+ "Upload PDF or XML files",
36
+ type=['pdf', 'xml'],
37
+ accept_multiple_files=True
38
+ )
39
+
40
+ if uploaded_files:
41
+ for uploaded_file in uploaded_files:
42
+ if uploaded_file.name not in st.session_state.processed_files:
43
+ # Create a temporary file
44
+ temp_path = f"temp_{uploaded_file.name}"
45
+ with open(temp_path, "wb") as f:
46
+ f.write(uploaded_file.getbuffer())
47
+
48
+ # Process the file
49
+ with st.spinner(f'Processing {uploaded_file.name}...'):
50
+ result = st.session_state.processor.process_file(temp_path)
51
+
52
+ if result['success']:
53
+ st.success(f"Successfully processed {uploaded_file.name}")
54
+ st.session_state.processed_files.add(uploaded_file.name)
55
+ else:
56
+ st.error(f"Failed to process {uploaded_file.name}: {result['error']}")
57
+
58
+ # Clean up temporary file
59
+ os.remove(temp_path)
60
+ else:
61
+ st.info(f"{uploaded_file.name} has already been processed")
62
+
63
+ # Display processed files
64
+ if st.session_state.processed_files:
65
+ st.subheader("Processed Files")
66
+ for file in sorted(st.session_state.processed_files):
67
+ st.text(f"✓ {file}")
68
+
69
+ def qa_page(self):
70
+ st.header("Ask Questions About Your Documents")
71
+
72
+ # Get available files
73
+ available_files = st.session_state.processor.get_available_files()
74
+ all_files = available_files['pdf'] + available_files['xml']
75
+
76
+ if not all_files:
77
+ st.warning("No processed files available. Please upload and process some files first.")
78
+ return
79
+
80
+ # File selection
81
+ selected_files = st.multiselect(
82
+ "Select files to search through",
83
+ all_files,
84
+ default=all_files
85
+ )
86
+
87
+ if not selected_files:
88
+ st.warning("Please select at least one file to search through.")
89
+ return
90
+
91
+ # Question input
92
+ question = st.text_input("Enter your question:")
93
+
94
+ if st.button("Ask Question") and question:
95
+ with st.spinner("Searching for answer..."):
96
+ answer = st.session_state.processor.ask_question_selective(
97
+ question,
98
+ selected_files
99
+ )
100
+ st.write("Answer:", answer)
101
+
102
+ def main():
103
+ app = StreamlitDocProcessor()
104
+ app.run()
105
+
106
+ if __name__ == "__main__":
107
+ main()