Spaces:

Ultronprime
/

Emails2go

Build error

App Files Files Community

Ultronprime commited on Feb 21, 2025

Commit

9c74ac0

verified ·

1 Parent(s): 9c81028

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -43

app.py CHANGED Viewed

@@ -2,23 +2,23 @@ import os
 import gradio as gr
 import logging
 import numpy as np
-from transformers import AutoModel, AutoTokenizer
 from sentence_transformers import SentenceTransformer
 import torch
 from torch.cuda.amp import autocast
 from spaces import GPU
-# Constants
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 CACHE_DIR = os.getenv("CACHE_DIR", "/tmp/cache")
 PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/tmp/data")
-HF_TOKEN = "YOUR_HF_TOKEN"  # Replace with your Hugging Face token
-# Create directories
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.makedirs(PERSISTENT_PATH, exist_ok=True)
-# Logging Setup
 LOG_DIR = os.getenv("LOG_DIR", "/data/logs")
 os.makedirs(LOG_DIR, exist_ok=True)
 LOG_FILE = LOG_DIR + "/app.log"
@@ -47,35 +47,39 @@ def initialize_model():
 @GPU()
 def generate_embedding(text, focus):
     global model
-    if model is None:
-        initialize_model()
     try:
         with autocast("cuda"):
-            embedding = model.encode([text])[0].tolist()
-        return embedding, ""
     except Exception as e:
         error_msg = f"Error generating embedding: {str(e)}"
         logger.error(error_msg)
         return "", error_msg
 @GPU()
-def save_embedding(embedding, name):
     try:
-        np.save(f"{PERSISTENT_PATH}/{name}.npy", np.array(embedding))
-        return f"Embedding saved as {name}.npy"
     except Exception as e:
         error_msg = f"Error saving embedding: {str(e)}"
         logger.error(error_msg)
         return error_msg
 @GPU()
-def convert_to_json(embedding, name):
     try:
-        import json
-        with open(f"{PERSISTENT_PATH}/{name}.json", "w") as f:
-            json.dump(embedding, f)
-        return f"Embedding saved as {name}.json"
     except Exception as e:
         error_msg = f"Error converting to JSON: {str(e)}"
         logger.error(error_msg)
@@ -84,23 +88,37 @@ def convert_to_json(embedding, name):
 @GPU()
 def process_files(files, focus):
     global model
-    if model is None:
-        initialize_model()
     try:
         all_embeddings = []
         for file in files:
-            with open(file.name, 'r') as f:
-                text = f.read()
-            with autocast("cuda"):
-                embedding = model.encode([text])[0].tolist()
-            all_embeddings.append(embedding)
-        return all_embeddings, ""
     except Exception as e:
-        error_msg = f"Error processing files: {str(e)}"
         logger.error(error_msg)
         return "", error_msg
 def create_gradio_interface():
     with gr.Blocks() as demo:
         gr.Markdown("## Text Embedding Generator")
@@ -113,41 +131,45 @@ def create_gradio_interface():
             file_input = gr.File(label="Upload Files", file_count="multiple")
         generate_button = gr.Button("Generate Embedding")
-        embedding_output = gr.Textbox(label="Embedding Vector", lines=5)
-        error_box = gr.Textbox(label="Status/Error Messages")
-        save_name_input = gr.Textbox(label="Save Embedding As")
-        save_button = gr.Button("Save Embedding")
-        save_status = gr.Textbox(label="Save Status")
-        convert_button = gr.Button("Convert to JSON")
-        convert_status = gr.Textbox(label="Convert Status")
-        download_button = gr.Button("Download JSON")
-        download_output = gr.File(label="Download JSON")
         process_button = gr.Button("Process Files")
-        process_output = gr.Textbox(label="Processed Files", lines=5)
         generate_button.click(
             generate_embedding,
             inputs=[text_input, focus_input],
-            outputs=[embedding_output, error_box]
         )
         save_button.click(
             save_embedding,
-            inputs=[embedding_output, save_name_input],
             outputs=[save_status]
         )
         convert_button.click(
             convert_to_json,
-            inputs=[embedding_output, save_name_input],
             outputs=[convert_status]
         )
         download_button.click(
-            lambda name: f"{PERSISTENT_PATH}/{name}.json",
             inputs=[save_name_input],
             outputs=[download_output]
         )
@@ -155,7 +177,7 @@ def create_gradio_interface():
         process_button.click(
             process_files,
             inputs=[file_input, focus_input],
-            outputs=[process_output, error_box]
         )
     return demo

 import gradio as gr
 import logging
 import numpy as np
 from sentence_transformers import SentenceTransformer
 import torch
 from torch.cuda.amp import autocast
 from spaces import GPU
+import json  # Import json for direct JSON output in UI
+# Constants (Keep your HF token secure - use environment variables if possible for real deployments)
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 CACHE_DIR = os.getenv("CACHE_DIR", "/tmp/cache")
 PERSISTENT_PATH = os.getenv("PERSISTENT_PATH", "/tmp/data")
+HF_TOKEN = "YOUR_HF_TOKEN"  # REMEMBER TO REPLACE THIS - BEST TO USE ENVIRONMENT VARIABLE
+# Create directories (still useful to try, even if /tmp/ is ephemeral)
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.makedirs(PERSISTENT_PATH, exist_ok=True)
+# Logging Setup (keep logging - it's helpful for debugging)
 LOG_DIR = os.getenv("LOG_DIR", "/data/logs")
 os.makedirs(LOG_DIR, exist_ok=True)
 LOG_FILE = LOG_DIR + "/app.log"
 @GPU()
 def generate_embedding(text, focus):
     global model
+    if model is None:
+        initialize_model()
     try:
         with autocast("cuda"):
+            embedding_vector = model.encode([text])[0].tolist() # Get embedding as list
+        # Convert embedding to JSON string for direct display in UI
+        embedding_json_str = json.dumps(embedding_vector)
+        return embedding_json_str, "" # Return JSON string to UI
     except Exception as e:
         error_msg = f"Error generating embedding: {str(e)}"
         logger.error(error_msg)
         return "", error_msg
 @GPU()
+def save_embedding(embedding_json, name): # Expect JSON string as input from UI
     try:
+        embedding = json.loads(embedding_json) # Parse JSON string back to list
+        filepath = f"{PERSISTENT_PATH}/{name}.npy" # Construct full filepath
+        np.save(filepath, np.array(embedding))
+        return f"Embedding saved to: {filepath}" # Return filepath in status
     except Exception as e:
         error_msg = f"Error saving embedding: {str(e)}"
         logger.error(error_msg)
         return error_msg
 @GPU()
+def convert_to_json(embedding_json, name): # Expect JSON string as input
     try:
+        filepath = f"{PERSISTENT_PATH}/{name}.json" # Construct full filepath
+        with open(filepath, "w") as f:
+            f.write(embedding_json) # Directly write the JSON string
+        return f"Embedding saved as JSON to: {filepath}" # Return filepath in status
     except Exception as e:
         error_msg = f"Error converting to JSON: {str(e)}"
         logger.error(error_msg)
 @GPU()
 def process_files(files, focus):
     global model
+    if model is None:
+        initialize_model()
     try:
         all_embeddings = []
+        file_statuses = [] # To track status for each file
         for file in files:
+            try:
+                with open(file.name, 'r') as f:
+                    text = f.read()
+                with autocast("cuda"):
+                    embedding = model.encode([text])[0].tolist()
+                all_embeddings.append(embedding)
+                file_statuses.append(f"File '{file.name}' processed successfully.")
+            except Exception as file_e:
+                error_msg = f"Error processing file '{file.name}': {str(file_e)}"
+                logger.error(error_msg)
+                file_statuses.append(error_msg)
+        # Prepare status message for all files
+        status_message = "\n".join(file_statuses)
+        # Convert embeddings to JSON string for UI display (for demonstration - might be too long for large files)
+        all_embeddings_json = json.dumps(all_embeddings)
+        return all_embeddings_json, status_message # Return JSON string and status message
     except Exception as e:
+        error_msg = f"Error in process_files function: {str(e)}"
         logger.error(error_msg)
         return "", error_msg
 def create_gradio_interface():
     with gr.Blocks() as demo:
         gr.Markdown("## Text Embedding Generator")
             file_input = gr.File(label="Upload Files", file_count="multiple")
         generate_button = gr.Button("Generate Embedding")
+        embedding_output = gr.Textbox(label="Embedding Vector (JSON)", lines=5) # Label changed to JSON
+        status_box = gr.Textbox(label="Status/Messages") # Renamed error_box to status_box
+        with gr.Accordion("Save and Download Options", open=False): # Accordion for save/download options
+            save_name_input = gr.Textbox(label="Save Embedding As (Name without extension)")
+            with gr.Row():
+                save_button = gr.Button("Save as .npy")
+                convert_button = gr.Button("Save as .json")
+            with gr.Row():
+                save_status = gr.Textbox(label="Save Status")
+                convert_status = gr.Textbox(label="Convert Status")
+            download_button = gr.Button("Download JSON")
+            download_output = gr.File(label="Download JSON File")
         process_button = gr.Button("Process Files")
+        process_output = gr.Textbox(label="Processed Files (Embeddings JSON - limited display)", lines=3) # Limited lines for process output
+        process_status = gr.Textbox(label="File Processing Status") # Status for file processing
         generate_button.click(
             generate_embedding,
             inputs=[text_input, focus_input],
+            outputs=[embedding_output, status_box] # Renamed error_box to status_box
         )
         save_button.click(
             save_embedding,
+            inputs=[embedding_output, save_name_input], # Input is now embedding_output (JSON string)
             outputs=[save_status]
         )
         convert_button.click(
             convert_to_json,
+            inputs=[embedding_output, save_name_input], # Input is embedding_output (JSON string)
             outputs=[convert_status]
         )
         download_button.click(
+            lambda name: f"{PERSISTENT_PATH}/{name}.json" if name else None, # Handle empty name
             inputs=[save_name_input],
             outputs=[download_output]
         )
         process_button.click(
             process_files,
             inputs=[file_input, focus_input],
+            outputs=[process_output, process_status] # outputs for process_files
         )
     return demo