cryogenic22 commited on
Commit
1447c64
Β·
verified Β·
1 Parent(s): 1a35a99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -27
app.py CHANGED
@@ -5,6 +5,7 @@ from utils.case_manager import CaseManager
5
  from utils.legal_notebook_interface import LegalNotebookInterface
6
  from datetime import datetime
7
  import os
 
8
  import nltk
9
  import spacy
10
 
@@ -16,53 +17,96 @@ st.set_page_config(
16
  initial_sidebar_state="expanded"
17
  )
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Initialize NLTK data directory
20
  @st.cache_resource
21
- def initialize_nltk():
22
  """Initialize NLTK with all required resources."""
23
- nltk_data_dir = os.path.join(os.getcwd(), "data", "nltk_data")
24
- os.makedirs(nltk_data_dir, exist_ok=True)
25
- nltk.data.path.append(nltk_data_dir)
 
 
 
 
 
 
 
26
 
27
- resources = ['punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', 'words', 'stopwords']
28
  for resource in resources:
29
  try:
30
- nltk.download(resource, download_dir=nltk_data_dir, quiet=True)
 
31
  except Exception as e:
32
  st.error(f"Error downloading NLTK resource {resource}: {str(e)}")
33
 
34
  return True
35
 
36
- # Initialize components with better error handling
37
  @st.cache_resource
38
  def init_components():
39
- """Initialize all components with proper error handling and feedback."""
40
  try:
41
- # Set up data directory
42
- data_dir = os.path.join(os.getcwd(), "data")
43
- os.makedirs(data_dir, exist_ok=True)
44
-
 
45
  # Initialize NLTK with status
46
- with st.spinner("Initializing NLP components..."):
47
- initialize_nltk()
48
-
49
- # Initialize spaCy with proper error handling
 
50
  try:
51
  nlp = spacy.load("en_core_web_sm")
 
52
  except OSError:
53
- with st.spinner("Downloading required language model..."):
54
- os.system("python -m spacy download en_core_web_sm")
55
- nlp = spacy.load("en_core_web_sm")
 
56
 
57
- # Initialize components with status updates
58
- case_manager = CaseManager(base_path=os.path.join(data_dir, "cases"))
59
- vector_store = VectorStore(storage_path=os.path.join(data_dir, "vectors"))
60
- doc_processor = DocumentProcessor(base_path=data_dir)
61
 
 
 
 
62
  return case_manager, vector_store, doc_processor
63
 
64
  except Exception as e:
65
  st.error(f"Error initializing components: {str(e)}")
 
66
  raise
67
 
68
  def main():
@@ -98,6 +142,26 @@ def main():
98
  background-color: #fff3cd;
99
  color: #664d03;
100
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  </style>
102
  """, unsafe_allow_html=True)
103
 
@@ -106,8 +170,7 @@ def main():
106
 
107
  # Initialize components with error handling
108
  try:
109
- with st.spinner("Initializing application components..."):
110
- case_manager, vector_store, doc_processor = init_components()
111
  except Exception as e:
112
  st.error("Failed to initialize application components. Please try again.")
113
  st.exception(e)
@@ -184,8 +247,22 @@ def main():
184
  text, chunks, metadata = doc_processor.process_and_tag_document(uploaded_file)
185
  progress_bar.progress(50)
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  # Prepare and add document
188
- status_text.text("Adding to case...")
189
  doc_data = {
190
  "id": metadata['doc_id'],
191
  "title": uploaded_file.name,
@@ -193,7 +270,6 @@ def main():
193
  "metadata": metadata,
194
  "chunks": chunks
195
  }
196
- progress_bar.progress(75)
197
 
198
  case_manager.add_document(case["id"], doc_data)
199
  progress_bar.progress(100)
@@ -208,6 +284,7 @@ def main():
208
 
209
  except Exception as e:
210
  st.error(f"Error processing document: {str(e)}")
 
211
  finally:
212
  # Clean up progress indicators
213
  progress_bar.empty()
@@ -216,6 +293,7 @@ def main():
216
  st.info("No cases created yet. Use the form above to create your first case.")
217
  except Exception as e:
218
  st.error(f"Error loading cases: {str(e)}")
 
219
 
220
  elif tab == "πŸ“„ Document Analysis":
221
  st.title("πŸ“„ Document Analysis")
 
5
  from utils.legal_notebook_interface import LegalNotebookInterface
6
  from datetime import datetime
7
  import os
8
+ from pathlib import Path
9
  import nltk
10
  import spacy
11
 
 
17
  initial_sidebar_state="expanded"
18
  )
19
 
20
+ def init_huggingface_directories():
21
+ """Initialize directory structure for Hugging Face Spaces."""
22
+ # Get the HF space data directory
23
+ if os.environ.get('SPACE_ID'):
24
+ # We're in a Hugging Face Space
25
+ base_dir = Path("/data")
26
+ else:
27
+ # Local development
28
+ base_dir = Path(os.getcwd()) / "data"
29
+
30
+ # Create required directories
31
+ directories = {
32
+ 'data': base_dir,
33
+ 'cases': base_dir / "cases",
34
+ 'vectors': base_dir / "vectors",
35
+ 'nltk_data': base_dir / "nltk_data",
36
+ 'temp': base_dir / "temp",
37
+ 'logs': base_dir / "logs"
38
+ }
39
+
40
+ # Create directories if they don't exist
41
+ for dir_name, dir_path in directories.items():
42
+ dir_path.mkdir(parents=True, exist_ok=True)
43
+ st.sidebar.write(f"βœ“ {dir_name} directory: {dir_path}")
44
+
45
+ return directories
46
+
47
  # Initialize NLTK data directory
48
  @st.cache_resource
49
+ def initialize_nltk(nltk_data_dir):
50
  """Initialize NLTK with all required resources."""
51
+ nltk.data.path.append(str(nltk_data_dir))
52
+
53
+ # List of all required NLTK resources
54
+ resources = [
55
+ 'punkt',
56
+ 'averaged_perceptron_tagger',
57
+ 'maxent_ne_chunker',
58
+ 'words',
59
+ 'stopwords'
60
+ ]
61
 
62
+ # Download each resource
63
  for resource in resources:
64
  try:
65
+ nltk.download(resource, download_dir=str(nltk_data_dir), quiet=True)
66
+ st.sidebar.write(f"βœ“ NLTK {resource}")
67
  except Exception as e:
68
  st.error(f"Error downloading NLTK resource {resource}: {str(e)}")
69
 
70
  return True
71
 
72
+ # Initialize components
73
  @st.cache_resource
74
  def init_components():
75
+ """Initialize all components with proper HF Spaces directory structure."""
76
  try:
77
+ st.sidebar.markdown("### πŸš€ Initializing Components")
78
+
79
+ # Initialize directory structure
80
+ directories = init_huggingface_directories()
81
+
82
  # Initialize NLTK with status
83
+ with st.sidebar.expander("πŸ“š NLTK Resources", expanded=False):
84
+ initialize_nltk(directories['nltk_data'])
85
+
86
+ # Initialize spaCy with proper error handling
87
+ with st.sidebar.expander("πŸ”§ SpaCy Model", expanded=False):
88
  try:
89
  nlp = spacy.load("en_core_web_sm")
90
+ st.write("βœ“ SpaCy model loaded")
91
  except OSError:
92
+ st.write("Downloading SpaCy model...")
93
+ os.system("python -m spacy download en_core_web_sm")
94
+ nlp = spacy.load("en_core_web_sm")
95
+ st.write("βœ“ SpaCy model downloaded and loaded")
96
 
97
+ # Initialize components with updated paths
98
+ case_manager = CaseManager(base_path=str(directories['cases']))
99
+ vector_store = VectorStore(storage_path=str(directories['vectors']))
100
+ doc_processor = DocumentProcessor(base_path=str(directories['data']))
101
 
102
+ # Log initialization status
103
+ st.sidebar.success("βœ… All components initialized successfully")
104
+
105
  return case_manager, vector_store, doc_processor
106
 
107
  except Exception as e:
108
  st.error(f"Error initializing components: {str(e)}")
109
+ st.exception(e)
110
  raise
111
 
112
  def main():
 
142
  background-color: #fff3cd;
143
  color: #664d03;
144
  }
145
+ .chat-message {
146
+ padding: 1.5rem;
147
+ border-radius: 0.5rem;
148
+ margin-bottom: 1rem;
149
+ }
150
+ .user-message {
151
+ background-color: #f0f2f6;
152
+ }
153
+ .assistant-message {
154
+ background-color: #ffffff;
155
+ border: 1px solid #e0e0e0;
156
+ }
157
+ .source-reference {
158
+ margin-top: 0.5rem;
159
+ padding: 0.5rem;
160
+ background-color: #f8f9fa;
161
+ border: 1px solid #e9ecef;
162
+ border-radius: 0.25rem;
163
+ font-size: 0.875rem;
164
+ }
165
  </style>
166
  """, unsafe_allow_html=True)
167
 
 
170
 
171
  # Initialize components with error handling
172
  try:
173
+ case_manager, vector_store, doc_processor = init_components()
 
174
  except Exception as e:
175
  st.error("Failed to initialize application components. Please try again.")
176
  st.exception(e)
 
247
  text, chunks, metadata = doc_processor.process_and_tag_document(uploaded_file)
248
  progress_bar.progress(50)
249
 
250
+ # Store in vector store
251
+ status_text.text("Creating document embeddings...")
252
+ for chunk in chunks:
253
+ vector_store.add_document(
254
+ doc_id=metadata['doc_id'],
255
+ text=chunk['text'],
256
+ metadata={
257
+ **metadata,
258
+ 'chunk_index': chunk['index'],
259
+ 'total_chunks': len(chunks)
260
+ }
261
+ )
262
+ progress_bar.progress(75)
263
+
264
  # Prepare and add document
265
+ status_text.text("Saving to case...")
266
  doc_data = {
267
  "id": metadata['doc_id'],
268
  "title": uploaded_file.name,
 
270
  "metadata": metadata,
271
  "chunks": chunks
272
  }
 
273
 
274
  case_manager.add_document(case["id"], doc_data)
275
  progress_bar.progress(100)
 
284
 
285
  except Exception as e:
286
  st.error(f"Error processing document: {str(e)}")
287
+ st.exception(e)
288
  finally:
289
  # Clean up progress indicators
290
  progress_bar.empty()
 
293
  st.info("No cases created yet. Use the form above to create your first case.")
294
  except Exception as e:
295
  st.error(f"Error loading cases: {str(e)}")
296
+ st.exception(e)
297
 
298
  elif tab == "πŸ“„ Document Analysis":
299
  st.title("πŸ“„ Document Analysis")