cryogenic22 commited on
Commit
f8f680c
Β·
verified Β·
1 Parent(s): d5ad9e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -57
app.py CHANGED
@@ -1,72 +1,225 @@
1
-
2
  # app.py
3
  import streamlit as st
4
  import asyncio
 
 
 
 
 
5
  from core.document_processor import DocumentProcessor
6
- from modules.qa_module import QAModule
7
- from modules.summarizer_module import SummarizerModule
8
- from modules.extractor_module import ExtractorModule
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- class GenAIDemo:
 
 
 
 
 
 
 
 
 
 
11
  def __init__(self):
12
- self.qa_module = QAModule()
13
- self.summarizer_module = SummarizerModule()
14
- self.extractor_module = ExtractorModule()
15
  self.doc_processor = DocumentProcessor()
16
 
17
- async def process_document(self, file):
18
- # Save uploaded file temporarily and process
19
- with open(f"temp/{file.name}", "wb") as f:
20
- f.write(file.getbuffer())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- result = self.doc_processor.process_document(f"temp/{file.name}")
23
- return result
24
-
25
- async def run_qa(self, query: str, context: str) -> Dict:
26
- return await self.qa_module.process({"query": query, "context": context})
27
-
28
- async def run_summary(self, text: str) -> Dict:
29
- return await self.summarizer_module.process({"text": text})
30
-
31
- async def run_extraction(self, text: str) -> Dict:
32
- return await self.extractor_module.process({"text": text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def main():
35
- st.title("GenAI Document Processing Demo")
36
-
37
- demo = GenAIDemo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # File upload
40
- uploaded_file = st.file_uploader("Choose a document", type=['txt', 'pdf', 'docx'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- if uploaded_file:
43
- # Process document
44
- doc_result = asyncio.run(demo.process_document(uploaded_file))
45
- st.write("Document processed successfully!")
46
-
47
- # Tabs for different functionalities
48
- tab1, tab2, tab3 = st.tabs(["Q&A", "Summarization", "Entity Extraction"])
49
-
50
- with tab1:
51
- st.header("Question & Answer")
52
- question = st.text_input("Ask a question about the document:")
53
- if question:
54
- qa_result = asyncio.run(demo.run_qa(question, doc_result["content"]))
55
- st.write("Answer:", qa_result["answer"])
56
-
57
- with tab2:
58
- st.header("Document Summarization")
59
- if st.button("Generate Summary"):
60
- summary_result = asyncio.run(demo.run_summary(doc_result["content"]))
61
- st.write("Summary:", summary_result["summary"])
62
-
63
- with tab3:
64
- st.header("Entity Extraction")
65
- if st.button("Extract Entities"):
66
- entities_result = asyncio.run(demo.run_extraction(doc_result["content"]))
67
- st.write("Entities found:")
68
- for entity in entities_result["entities"]:
69
- st.write(f"- {entity['text']} ({entity['label']})")
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  if __name__ == "__main__":
72
- main()
 
 
1
  # app.py
2
  import streamlit as st
3
  import asyncio
4
+ from pathlib import Path
5
+ import tempfile
6
+ import time
7
+ from typing import Dict, List
8
+ import pandas as pd
9
  from core.document_processor import DocumentProcessor
10
+ from core.embeddings import DocumentEmbedder
11
+ from core.vector_store import FAISSVectorStore
12
+ from modules.qa_module import EnhancedQAModule
13
+
14
+ # Page configuration
15
+ st.set_page_config(
16
+ page_title="SYNAPTYX - AI Accelerator",
17
+ page_icon="🧠",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded"
20
+ )
21
+
22
+ # Custom CSS
23
+ st.markdown("""
24
+ <style>
25
+ .main {
26
+ background-color: #f5f5f5;
27
+ }
28
+ .stButton>button {
29
+ background-color: #4CAF50;
30
+ color: white;
31
+ border-radius: 5px;
32
+ border: none;
33
+ padding: 10px 24px;
34
+ }
35
+ .stButton>button:hover {
36
+ background-color: #45a049;
37
+ }
38
+ .css-1d391kg {
39
+ padding: 2rem 1rem;
40
+ }
41
+ .stAlert {
42
+ background-color: rgba(255, 255, 255, 0.9);
43
+ }
44
+ .custom-title {
45
+ font-size: 2.5rem;
46
+ font-weight: bold;
47
+ color: #1E3D59;
48
+ text-align: center;
49
+ margin-bottom: 2rem;
50
+ }
51
+ .metric-card {
52
+ background-color: white;
53
+ padding: 1rem;
54
+ border-radius: 10px;
55
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
56
+ }
57
+ </style>
58
+ """, unsafe_allow_html=True)
59
 
60
+ # Initialize session state
61
+ if 'processed_docs' not in st.session_state:
62
+ st.session_state.processed_docs = 0
63
+ if 'total_chunks' not in st.session_state:
64
+ st.session_state.total_chunks = 0
65
+ if 'demo' not in st.session_state:
66
+ st.session_state.demo = None
67
+ if 'history' not in st.session_state:
68
+ st.session_state.history = []
69
+
70
+ class SynaptyxDemo:
71
  def __init__(self):
72
+ self.embedder = DocumentEmbedder()
73
+ self.vector_store = FAISSVectorStore()
74
+ self.qa_module = EnhancedQAModule()
75
  self.doc_processor = DocumentProcessor()
76
 
77
+ async def process_document(self, file) -> Dict:
78
+ try:
79
+ # Create temp file path
80
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.name).suffix) as tmp_file:
81
+ tmp_file.write(file.getbuffer())
82
+ temp_path = tmp_file.name
83
+
84
+ # Process document
85
+ doc_content = self.doc_processor.process_document(temp_path)
86
+
87
+ # Process for vector store
88
+ chunks, embeddings, metadata = self.embedder.process_documents([{
89
+ "content": doc_content["content"],
90
+ "source": file.name
91
+ }])
92
+
93
+ # Add to vector store
94
+ self.vector_store.add_documents(chunks, embeddings, metadata)
95
+
96
+ return {
97
+ "status": "success",
98
+ "chunks": len(chunks),
99
+ "metadata": doc_content["metadata"]
100
+ }
101
 
102
+ except Exception as e:
103
+ return {"status": "error", "error": str(e)}
104
+
105
+ finally:
106
+ Path(temp_path).unlink(missing_ok=True)
107
+
108
+ async def query(self, question: str, k: int = 5) -> Dict:
109
+ try:
110
+ # Get relevant documents
111
+ relevant_docs = self.vector_store.similarity_search(
112
+ question,
113
+ self.embedder,
114
+ k=k
115
+ )
116
+
117
+ # Get answer
118
+ answer = await self.qa_module.process(question, relevant_docs)
119
+
120
+ return {
121
+ "status": "success",
122
+ "answer": answer["answer"],
123
+ "confidence": answer["confidence"],
124
+ "sources": answer["sources"]
125
+ }
126
+ except Exception as e:
127
+ return {"status": "error", "error": str(e)}
128
 
129
  def main():
130
+ # Initialize demo instance if not exists
131
+ if st.session_state.demo is None:
132
+ st.session_state.demo = SynaptyxDemo()
133
+
134
+ # Sidebar
135
+ with st.sidebar:
136
+ st.image("https://via.placeholder.com/150?text=SYNAPTYX", width=150)
137
+ st.markdown("### 🧠 SYNAPTYX")
138
+ st.markdown("#### AI Accelerator Platform")
139
+ st.markdown("---")
140
+ st.markdown("### πŸ“Š Analytics")
141
+ st.markdown(f"Documents Processed: {st.session_state.processed_docs}")
142
+ st.markdown(f"Total Chunks: {st.session_state.total_chunks}")
143
+ st.markdown("---")
144
+ st.markdown("### πŸ”§ Settings")
145
+ k_value = st.slider("Number of relevant chunks", 1, 10, 5)
146
+
147
+ # Clear history button
148
+ if st.button("πŸ—‘οΈ Clear History"):
149
+ st.session_state.history = []
150
+ st.success("History cleared!")
151
+
152
+ # Main content
153
+ st.markdown("<h1 class='custom-title'>🧠 SYNAPTYX - Document Analytics</h1>", unsafe_allow_html=True)
154
 
155
+ # Document upload section
156
+ st.markdown("### πŸ“ Document Upload")
157
+ uploaded_files = st.file_uploader(
158
+ "Upload your documents (PDF, DOCX, or TXT)",
159
+ type=['pdf', 'docx', 'txt'],
160
+ accept_multiple_files=True
161
+ )
162
+
163
+ if uploaded_files:
164
+ for file in uploaded_files:
165
+ with st.spinner(f"Processing {file.name}..."):
166
+ result = asyncio.run(st.session_state.demo.process_document(file))
167
+
168
+ if result["status"] == "success":
169
+ st.session_state.processed_docs += 1
170
+ st.session_state.total_chunks += result["chunks"]
171
+
172
+ with st.expander(f"πŸ“„ {file.name} - Details"):
173
+ st.json(result["metadata"])
174
+
175
+ st.success(f"Successfully processed {file.name}")
176
+ else:
177
+ st.error(f"Error processing {file.name}: {result['error']}")
178
+
179
+ # Query section
180
+ st.markdown("### πŸ’¬ Ask Questions")
181
+ query = st.text_input("What would you like to know about your documents?")
182
 
183
+ if query:
184
+ with st.spinner("Analyzing..."):
185
+ result = asyncio.run(st.session_state.demo.query(query, k=k_value))
186
+
187
+ if result["status"] == "success":
188
+ # Add to history
189
+ st.session_state.history.append({
190
+ "question": query,
191
+ "answer": result["answer"],
192
+ "confidence": result["confidence"],
193
+ "sources": result["sources"],
194
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
195
+ })
196
+
197
+ # Display answer
198
+ st.markdown("#### Answer")
199
+ st.info(result["answer"])
200
+
201
+ col1, col2 = st.columns(2)
202
+ with col1:
203
+ st.markdown("##### Confidence Score")
204
+ st.progress(result["confidence"])
205
+
206
+ with col2:
207
+ st.markdown("##### Sources")
208
+ for source in result["sources"]:
209
+ st.markdown(f"- {source}")
210
+ else:
211
+ st.error(f"Error: {result['error']}")
212
+
213
+ # History section
214
+ if st.session_state.history:
215
+ st.markdown("### πŸ“œ History")
216
+ for i, item in enumerate(reversed(st.session_state.history)):
217
+ with st.expander(f"Q: {item['question']} ({item['timestamp']})"):
218
+ st.markdown(f"**Answer:** {item['answer']}")
219
+ st.markdown(f"**Confidence:** {item['confidence']:.2f}")
220
+ st.markdown("**Sources:**")
221
+ for source in item['sources']:
222
+ st.markdown(f"- {source}")
223
 
224
  if __name__ == "__main__":
225
+ main()