Spaces:

deenasun
/

ai-sl-api

Running

App Files Files Community

deenasun commited on Jun 22, 2025

Commit

bf3d9ee

1 Parent(s): 03ba989

fix gradio interface

Browse files

Files changed (1) hide show

app.py +107 -155

app.py CHANGED Viewed

@@ -23,7 +23,8 @@ R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
 R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
 # Validate that required environment variables are set
-if not all([R2_ASL_VIDEOS_URL, R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
     raise ValueError(
         "Missing required R2 environment variables. "
         "Please check your .env file."
@@ -54,15 +55,17 @@ s3 = session.client(
 )
 def clean_gloss_token(token):
-    """
-    Clean a gloss token by removing brackets, newlines, and extra whitespace
-    """
-    # Remove brackets and newlines
-    cleaned = re.sub(r'[\[\]\n\r]', '', token)
     # Remove extra whitespace
     cleaned = re.sub(r'\s+', ' ', cleaned).strip()
-    cleaned = cleaned.lower()
-    return cleaned
 def upload_video_to_r2(video_path, bucket_name="asl-videos"):
@@ -84,8 +87,10 @@ def upload_video_to_r2(video_path, bucket_name="asl-videos"):
             )
         # Replace the endpoint with the domain for uploading
-        public_domain = R2_ENDPOINT.replace('https://', '').split('.')[0]
-        video_url = f"https://{public_domain}.r2.cloudflarestorage.com/{bucket_name}/{unique_filename}"
         print(f"Video uploaded to R2: {video_url}")
         public_video_url = f"{R2_ASL_VIDEOS_URL}/{unique_filename}"
@@ -150,52 +155,24 @@ def cleanup_temp_video(file_path):
         print(f"Error cleaning up file: {e}")
-def process_text_to_gloss(text):
-    """
-    Convert text directly to ASL gloss
-    """
-    try:
-        # For text input, we can use a simpler approach or call the
-        # document converter with a temporary text file
-        import tempfile
-        # Create a temporary text file
-        with tempfile.NamedTemporaryFile(
-            mode='w', suffix='.txt', delete=False
-        ) as temp_file:
-            temp_file.write(text)
-            temp_file_path = temp_file.name
-        # Use the existing document converter
-        gloss = asl_converter.convert_document(temp_file_path)
-        # Clean up the temporary file
-        os.unlink(temp_file_path)
-        return gloss
-    except Exception as e:
-        print(f"Error processing text: {e}")
-        return None
 def process_input(input_data):
-    """
-    Process either text input or file upload
-    input_data can be either a string (text) or a file object
-    """
-    if input_data is None:
-        return None
-    # Check if it's a file object (has .name attribute)
-    if hasattr(input_data, 'name'):
-        # It's a file upload
-        print(f"Processing file: {input_data.name}")
-        return asl_converter.convert_document(input_data.name)
     else:
-        # It's text input
-        print(f"Processing text input: "
-              f"{input_data[:100]}...")
-        return process_text_to_gloss(input_data)
 async def parse_vectorize_and_search_unified(input_data):
@@ -210,7 +187,7 @@ async def parse_vectorize_and_search_unified(input_data):
         return {
             "status": "error",
             "message": "Failed to process input"
-        }, None, ""
     print("ASL", gloss)
@@ -264,44 +241,25 @@ async def parse_vectorize_and_search_unified(input_data):
         stitched_video_path = video_files[0]
     # Upload final video to R2 and get public URL
-    final_video_url = None
     if stitched_video_path:
-        final_video_url = upload_video_to_r2(stitched_video_path)
-        # Clean up the local file after upload
-        cleanup_temp_video(stitched_video_path)
     # Clean up individual video files after stitching
     for video_file in video_files:
         if video_file != stitched_video_path:  # Don't delete the final output
             cleanup_temp_video(video_file)
-    # Create download link HTML
-    download_html = ""
-    if final_video_url:
-        download_html = f"""
-        <div style="text-align: center; padding: 20px;">
-            <h3>Download Your ASL Video</h3>
-            <a href="{final_video_url}" download="asl_video.mp4"
-               style="background-color: #4CAF50; color: white;
-                      padding: 12px 24px; text-decoration: none;
-                      border-radius: 4px; display: inline-block;">
-                Download Video
-            </a>
-            <p style="margin-top: 10px; color: #666;">
-                <small>Right-click and "Save As" if the download doesn't
-                       start automatically</small>
-            </p>
-        </div>
-        """
     return {
         "status": "success",
         "videos": videos,
         "video_count": len(videos),
         "gloss": gloss,
         "cleaned_tokens": cleaned_tokens,
-        "final_video_url": final_video_url
-    }, final_video_url, download_html
 def parse_vectorize_and_search_unified_sync(input_data):
@@ -317,10 +275,35 @@ def predict_unified(input_data):
             return {
                 "status": "error",
                 "message": "Please provide text or upload a document"
-            }, None, ""
         # Use the unified processing function
         result = parse_vectorize_and_search_unified_sync(input_data)
         return result
     except Exception as e:
@@ -328,90 +311,59 @@ def predict_unified(input_data):
         return {
             "status": "error",
             "message": f"An error occurred: {str(e)}"
-        }, None, ""
 # Create the Gradio interface
 def create_interface():
     """Create and configure the Gradio interface"""
-    with gr.Blocks(title=title) as demo:
-        gr.Markdown(f"# {title}")
-        gr.Markdown(description)
-        with gr.Row():
-            with gr.Column():
-                # Input section
-                gr.Markdown("## Input Options")
-                # Text input
-                gr.Markdown("### Option 1: Enter Text")
-                text_input = gr.Textbox(
-                    label="Enter text to convert to ASL",
-                    placeholder="Type or paste your text here...",
-                    lines=5,
-                    max_lines=10
-                )
-                gr.Markdown("### Option 2: Upload Document")
-                file_input = gr.File(
-                    label="Upload Document (pdf, txt, docx, or epub)",
-                    file_types=[".pdf", ".txt", ".docx", ".epub"]
-                )
-                # Processing options
-                gr.Markdown("## Processing Options")
-                use_r2 = gr.Checkbox(
-                    label="Use Cloud Storage (R2)",
-                    value=True,
-                    info=("Upload video to cloud storage for "
-                          "persistent access")
-                )
-                process_btn = gr.Button(
-                    "Generate ASL Video",
-                    variant="primary"
-                )
-            with gr.Column():
-                # Output section
-                gr.Markdown("## Results")
-                json_output = gr.JSON(label="Processing Results")
-                video_output = gr.Video(label="ASL Video Output")
-                download_html = gr.HTML(label="Download Link")
-        # Handle the processing
-        def process_inputs(text, file, use_r2_storage):
-            # Determine which input to use
-            if text and text.strip():
-                # Use text input
-                input_data = text.strip()
-            elif file is not None:
-                # Use file input
-                input_data = file
-            else:
-                # No input provided
-                return {
-                    "status": "error",
-                    "message": "Please provide either text or upload a file"
-                }, None, ""
-            # Process using the unified function
-            return predict_unified(input_data)
-        process_btn.click(
-            fn=process_inputs,
-            inputs=[text_input, file_input, use_r2],
-            outputs=[json_output, video_output, download_html]
-        )
-        # Footer
-        gr.Markdown(article)
-    return demo
-# For Hugging Face Spaces, use the Blocks interface
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(

 R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
 # Validate that required environment variables are set
+if not all([R2_ASL_VIDEOS_URL, R2_ENDPOINT, R2_ACCESS_KEY_ID,
+            R2_SECRET_ACCESS_KEY]):
     raise ValueError(
         "Missing required R2 environment variables. "
         "Please check your .env file."
 )
 def clean_gloss_token(token):
+    """Clean a single gloss token"""
+    if not token:
+        return None
+    # Remove punctuation and convert to lowercase
+    cleaned = re.sub(r'[^\w\s]', '', token).lower().strip()
     # Remove extra whitespace
     cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    return cleaned if cleaned else None
 def upload_video_to_r2(video_path, bucket_name="asl-videos"):
             )
         # Replace the endpoint with the domain for uploading
+        public_domain = (R2_ENDPOINT.replace('https://', '')
+                         .split('.')[0])
+        video_url = (f"https://{public_domain}.r2.cloudflarestorage.com/"
+                     f"{bucket_name}/{unique_filename}")
         print(f"Video uploaded to R2: {video_url}")
         public_video_url = f"{R2_ASL_VIDEOS_URL}/{unique_filename}"
         print(f"Error cleaning up file: {e}")
 def process_input(input_data):
+    """Process input data to extract text for ASL conversion"""
+    if isinstance(input_data, str):
+        # Direct text input
+        return input_data.strip()
+    elif hasattr(input_data, 'name'):
+        # File input - extract text from document
+        try:
+            print(f"Processing file: {input_data.name}")
+            gloss = asl_converter.convert_document(input_data.name)
+            print(f"Converted gloss: {gloss[:100]}...")  # Show first 100 chars
+            return gloss
+        except Exception as e:
+            print(f"Error processing file: {e}")
+            return None
     else:
+        print(f"Unsupported input type: {type(input_data)}")
+        return None
 async def parse_vectorize_and_search_unified(input_data):
         return {
             "status": "error",
             "message": "Failed to process input"
+        }, None
     print("ASL", gloss)
         stitched_video_path = video_files[0]
     # Upload final video to R2 and get public URL
+    video_download_url = None
     if stitched_video_path:
+        video_download_url = upload_video_to_r2(stitched_video_path)
+        # Don't clean up the local file yet - let frontend use it first
     # Clean up individual video files after stitching
     for video_file in video_files:
         if video_file != stitched_video_path:  # Don't delete the final output
             cleanup_temp_video(video_file)
+    # Return simplified results
     return {
         "status": "success",
         "videos": videos,
         "video_count": len(videos),
         "gloss": gloss,
         "cleaned_tokens": cleaned_tokens,
+        "video_download_url": video_download_url
+    }, stitched_video_path
 def parse_vectorize_and_search_unified_sync(input_data):
             return {
                 "status": "error",
                 "message": "Please provide text or upload a document"
+            }, None
         # Use the unified processing function
         result = parse_vectorize_and_search_unified_sync(input_data)
+        # Get the results
+        json_data, local_video_path = result
+        # If we have a local video path, use it directly for Gradio
+        if local_video_path and json_data.get("status") == "success":
+            # Schedule cleanup of the video file after a delay
+            # This gives Gradio time to load and display the video
+            import threading
+            import time
+            def delayed_cleanup(video_path):
+                time.sleep(30)  # Wait 30 seconds before cleanup
+                cleanup_temp_video(video_path)
+            # Start cleanup thread
+            cleanup_thread = threading.Thread(
+                target=delayed_cleanup,
+                args=(local_video_path,)
+            )
+            cleanup_thread.daemon = True
+            cleanup_thread.start()
+            return json_data, local_video_path
         return result
     except Exception as e:
         return {
             "status": "error",
             "message": f"An error occurred: {str(e)}"
+        }, None
 # Create the Gradio interface
 def create_interface():
     """Create and configure the Gradio interface"""
+    def process_inputs(text, file):
+        """Process text or file input and return results"""
+        # Determine which input to use
+        if text and text.strip():
+            # Use text input
+            input_data = text.strip()
+        elif file is not None:
+            # Use file input
+            input_data = file
+        else:
+            # No input provided
+            return {
+                "status": "error",
+                "message": "Please provide either text or upload a file"
+            }, None
+        # Process using the unified function
+        return predict_unified(input_data)
+    # Create the interface
+    interface = gr.Interface(
+        fn=process_inputs,
+        inputs=[
+            gr.Textbox(
+                label="Enter text to convert to ASL",
+                placeholder="Type or paste your text here...",
+                lines=5
+            ),
+            gr.File(
+                label="Upload Document (pdf, txt, docx, or epub)",
+                file_types=[".pdf", ".txt", ".docx", ".epub"]
+            )
+        ],
+        outputs=[
+            gr.JSON(label="Results"),
+            gr.Video(label="ASL Video")
+        ],
+        title=title,
+        description=description,
+        article=article
+    )
+    return interface
+# For Hugging Face Spaces, use the Interface
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(