Ultronprime commited on
Commit
e2228da
·
verified ·
1 Parent(s): 160e875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -17,6 +17,7 @@ import requests
17
  from charset_normalizer import from_bytes
18
  import zipfile
19
  import tempfile
 
20
 
21
  # Custom Exception Class
22
  class GPUQuotaExceededError(Exception):
@@ -27,16 +28,20 @@ EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
27
  CHUNK_SIZE = 500
28
  BATCH_SIZE = 32
29
 
30
- # Persistent storage directories
31
  PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/data")
32
  os.makedirs(PERSISTENT_PATH, exist_ok=True, mode=0o777)
33
 
 
34
  TEMP_DIR = os.path.join(PERSISTENT_PATH, "temp")
35
  os.makedirs(TEMP_DIR, exist_ok=True, mode=0o777)
36
 
37
  OUTPUTS_DIR = os.path.join(PERSISTENT_PATH, "outputs")
38
  os.makedirs(OUTPUTS_DIR, exist_ok=True, mode=0o777)
39
 
 
 
 
40
  LOG_DIR = os.getenv("LOG_DIR", os.path.join(PERSISTENT_PATH, "logs"))
41
  os.makedirs(LOG_DIR, exist_ok=True, mode=0o777)
42
 
@@ -142,8 +147,11 @@ def process_files(files):
142
  all_embeddings = []
143
  for i in range(0, len(all_chunks), BATCH_SIZE):
144
  batch = all_chunks[i:i+BATCH_SIZE]
145
- embeddings = handle_gpu_operation(lambda: get_model().encode(batch)) if model else []
146
- all_embeddings.extend(embeddings)
 
 
 
147
 
148
  # Save results to OUTPUTS_DIR
149
  embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
@@ -168,9 +176,8 @@ def process_files(files):
168
  def semantic_search(query, top_k=5):
169
  global model
170
  if model is None:
171
- if not initialize_model():
172
- return "Model initialization failed. Please try again."
173
-
174
  try:
175
  # Load saved embeddings from OUTPUTS_DIR
176
  stored_embeddings = np.load(os.path.join(OUTPUTS_DIR, "embeddings.npy"))
@@ -182,7 +189,7 @@ def semantic_search(query, top_k=5):
182
 
183
  # Get query embedding
184
  if model:
185
- query_embedding = handle_gpu_operation(lambda: get_model().encode([query]))[0]
186
  else:
187
  return "Model not initialized. Please process files first."
188
 
@@ -216,20 +223,20 @@ def search_and_format(query, num_results):
216
 
217
  def browse_outputs():
218
  try:
219
- # Attempt to open the OUTPUTS_DIR
220
- os.startfile(OUTPUTS_DIR)
221
- return "Opened outputs directory successfully"
222
  except Exception as e:
223
  logger.error(f"Error opening file browser: {str(e)}")
224
  return "Error opening file browser"
225
 
226
- def download_results_from_disk():
227
  try:
228
  output_files = [
229
  os.path.join(OUTPUTS_DIR, "embeddings.npy"),
230
  os.path.join(OUTPUTS_DIR, "chunks.txt")
231
  ]
232
 
 
233
  with tempfile.TemporaryDirectory() as temp_dir:
234
  zip_path = os.path.join(temp_dir, "results.zip")
235
  with zipfile.ZipFile(zip_path, 'w') as zipf:
@@ -239,7 +246,7 @@ def download_results_from_disk():
239
  return zip_path
240
  except Exception as e:
241
  logger.error(f"Error creating download: {str(e)}")
242
- return "Error creating download file"
243
 
244
  def create_gradio_interface():
245
  with gr.Blocks() as demo:
@@ -282,16 +289,14 @@ def create_gradio_interface():
282
  outputs=results_output
283
  )
284
 
285
- # Download Results Button
286
- download_results_button = gr.Button("⬇️ Download Results")
287
- download_results_button.click(
288
- fn=download_results_from_disk,
289
  outputs=[gr.File(label="Download Results")]
290
  )
291
 
292
  with gr.Tab("Outputs"):
293
- # Browse Outputs Button
294
- browse_button = gr.Button("📁 Browse Outputs", variant="primary")
295
  browse_button.click(
296
  fn=browse_outputs,
297
  outputs=None
 
17
  from charset_normalizer import from_bytes
18
  import zipfile
19
  import tempfile
20
+ import webbrowser
21
 
22
  # Custom Exception Class
23
  class GPUQuotaExceededError(Exception):
 
28
  CHUNK_SIZE = 500
29
  BATCH_SIZE = 32
30
 
31
+ # Set Persistent Storage Path
32
  PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/data")
33
  os.makedirs(PERSISTENT_PATH, exist_ok=True, mode=0o777)
34
 
35
+ # Define Subdirectories
36
  TEMP_DIR = os.path.join(PERSISTENT_PATH, "temp")
37
  os.makedirs(TEMP_DIR, exist_ok=True, mode=0o777)
38
 
39
  OUTPUTS_DIR = os.path.join(PERSISTENT_PATH, "outputs")
40
  os.makedirs(OUTPUTS_DIR, exist_ok=True, mode=0o777)
41
 
42
+ NPY_CACHE = os.path.join(PERSISTENT_PATH, "npy_cache")
43
+ os.makedirs(NPY_CACHE, exist_ok=True, mode=0o777)
44
+
45
  LOG_DIR = os.getenv("LOG_DIR", os.path.join(PERSISTENT_PATH, "logs"))
46
  os.makedirs(LOG_DIR, exist_ok=True, mode=0o777)
47
 
 
147
  all_embeddings = []
148
  for i in range(0, len(all_chunks), BATCH_SIZE):
149
  batch = all_chunks[i:i+BATCH_SIZE]
150
+ if model:
151
+ embeddings = handle_gpu_operation(lambda: model.encode(batch))
152
+ all_embeddings.extend(embeddings)
153
+ else:
154
+ return "Model not initialized. Please check model initialization.", "", ""
155
 
156
  # Save results to OUTPUTS_DIR
157
  embeddings_path = os.path.join(OUTPUTS_DIR, "embeddings.npy")
 
176
  def semantic_search(query, top_k=5):
177
  global model
178
  if model is None:
179
+ return "Model not initialized. Please process files first."
180
+
 
181
  try:
182
  # Load saved embeddings from OUTPUTS_DIR
183
  stored_embeddings = np.load(os.path.join(OUTPUTS_DIR, "embeddings.npy"))
 
189
 
190
  # Get query embedding
191
  if model:
192
+ query_embedding = model.encode([query])[0]
193
  else:
194
  return "Model not initialized. Please process files first."
195
 
 
223
 
224
  def browse_outputs():
225
  try:
226
+ webbrowser.open(f"file://{OUTPUTS_DIR}")
227
+ return "Opened outputs directory"
 
228
  except Exception as e:
229
  logger.error(f"Error opening file browser: {str(e)}")
230
  return "Error opening file browser"
231
 
232
+ def download_results():
233
  try:
234
  output_files = [
235
  os.path.join(OUTPUTS_DIR, "embeddings.npy"),
236
  os.path.join(OUTPUTS_DIR, "chunks.txt")
237
  ]
238
 
239
+ # Create a temporary zip file
240
  with tempfile.TemporaryDirectory() as temp_dir:
241
  zip_path = os.path.join(temp_dir, "results.zip")
242
  with zipfile.ZipFile(zip_path, 'w') as zipf:
 
246
  return zip_path
247
  except Exception as e:
248
  logger.error(f"Error creating download: {str(e)}")
249
+ return None
250
 
251
  def create_gradio_interface():
252
  with gr.Blocks() as demo:
 
289
  outputs=results_output
290
  )
291
 
292
+ download_button = gr.Button("⬇️ Download Results")
293
+ download_button.click(
294
+ fn=download_results,
 
295
  outputs=[gr.File(label="Download Results")]
296
  )
297
 
298
  with gr.Tab("Outputs"):
299
+ browse_button = gr.Button("📁 Browse Outputs")
 
300
  browse_button.click(
301
  fn=browse_outputs,
302
  outputs=None