jfforero commited on
Commit
ef95675
·
verified ·
1 Parent(s): b08871c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -77
app.py CHANGED
@@ -907,93 +907,87 @@ custom_css = """
907
  }
908
  """
909
  # Create the Gradio interface with proper output handling
910
- with gr.Blocks(title="Affective Virtual Environments - Chunked Processing", css=custom_css) as interface:
911
  gr.Markdown("# Bello")
912
  gr.Markdown(
913
  """
914
- ***Bello*** explores the affective nuances of the human voice.
915
- Using multimodal speech emotion recognition techniques, the project analyzes acoustic, prosodic,
916
- and semantic parameters of spoken language to generate immersive 360° virtual environments.
917
 
918
- ### How to interact
919
 
920
- 1. Record your voice saying whatever you wanta poem, a song, a story, or any sentence that comes to mind.
921
- 2. Set the length to split your recording into chunks.
922
- 3. Check the box if you want to generate audio for each chunk.
923
- 4. Generate your Affective Virtual Environment and wait for the results.
924
- 5. Download the HTML file.
925
- 6. Open your creation with any web browser.
926
 
927
  ---
928
- **Learn more:**
929
- • Video Tutorial: [How to Use this Space](https://youtu.be/eVD1lzwVhi8)
930
 
931
- For more information about the project, visit: [www.emotional-machines.com](https://www.emotional-machines.com)
932
  """
933
  )
934
 
935
 
936
  with gr.Row():
937
  with gr.Column(scale=2):
938
- audio_input = gr.Audio(label="Input Audio", type="filepath", sources=["microphone", "upload"])
939
 
940
- # Add example audio selection
941
- # example_selector = gr.Dropdown(
942
- # label="Select Example Audio",
943
- # choices=["Happy Speech", "Sad Story", "Neutral News"],
944
- # value=None,
945
- # info="Choose from pre-recorded example speeches"
946
- # )
947
 
948
- # Add button to load selected example
949
- #load_example_btn = gr.Button("Load Example", variant="secondary")
950
 
951
  with gr.Column(scale=1):
952
- # Add chunk duration input
953
  chunk_duration_input = gr.Number(
954
- label="Chunk Duration (seconds)",
955
  value=10,
956
  minimum=1,
957
  maximum=60,
958
  step=1,
959
- info="Duration of each audio segment to process (1-60 seconds)"
960
  )
961
- # Add checkbox for audio generation
962
  generate_audio_checkbox = gr.Checkbox(
963
- label="Generate Audio (may take longer)",
964
  value=False,
965
- info="Uncheck to skip music generation and speed up processing"
966
  )
967
  with gr.Row():
968
- process_btn = gr.Button("Generate", variant="primary")
969
- clear_btn = gr.Button("Clear All", variant="secondary")
970
 
971
- # Add a loading indicator
972
  loading_indicator = gr.HTML("""
973
  <div id="loading" style="display: none; text-align: center; margin: 20px;">
974
- <p style="font-size: 18px; color: #4a4a4a;">Processing audio chunks...</p>
975
  <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
976
  <style>@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }</style>
977
  </div>
978
  """)
979
 
980
- # Create output components for each chunk type
981
  output_containers = []
982
- group_components = [] # Store group components separately
983
 
984
- # We'll create up to 20 chunk slots to accommodate different chunk durations
985
  for i in range(20):
986
  with gr.Group(visible=False) as chunk_group:
987
- gr.Markdown(f"### Chunk {i+1} Results")
988
  with gr.Row():
989
- emotion_output = gr.Label(label="Acoustic Emotion Prediction")
990
- transcription_output = gr.Label(label="Transcribed Text")
991
- sentiment_output = gr.Label(label="Sentimental Analysis")
992
  with gr.Row():
993
- image_output = gr.Image(label="Generated Equirectangular Image")
994
- image_360_output = gr.File(label="Download 360 Image", type="filepath")
995
  with gr.Row():
996
- audio_output = gr.Audio(label="Generated Music")
997
  gr.HTML("<hr style='margin: 20px 0; border: 1px solid #ccc;'>")
998
 
999
  group_components.append(chunk_group)
@@ -1006,37 +1000,16 @@ and semantic parameters of spoken language to generate immersive 360° virtual e
1006
  'music': audio_output
1007
  })
1008
 
1009
- # Enhanced Download 360 Viewer Section
1010
  with gr.Group(visible=True, elem_classes="download-section") as download_group:
1011
- gr.Markdown("""
1012
-
1013
-
1014
-
1015
- """)
1016
-
1017
- # Enhanced download button
1018
  viewer_html_output = gr.File(
1019
- label=" Once processing is complete, download your AVE from here 🚀",
1020
  type="filepath",
1021
  interactive=False,
1022
  elem_classes="download-button"
1023
  )
1024
 
1025
- # Add a hidden HTML component for JavaScript execution
1026
  js_output = gr.HTML(visible=False)
1027
 
1028
- # Function to handle example selection
1029
- def load_example(example_name):
1030
- if not example_name:
1031
- return None, None
1032
-
1033
- # Get the path to the example audio file
1034
- example_path = load_example_audio(example_name)
1035
-
1036
- # Return the example path to update the audio component
1037
- return example_path, example_name
1038
-
1039
- # Set up the button clicks
1040
  process_btn.click(
1041
  fn=process_and_display,
1042
  inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
@@ -1063,14 +1036,4 @@ and semantic parameters of spoken language to generate immersive 360° virtual e
1063
  ]] + [loading_indicator, chunk_duration_input, viewer_html_output, js_output]
1064
  )
1065
 
1066
- #load_example_btn.click(
1067
- # fn=load_example,
1068
- # inputs=[example_selector],
1069
- # outputs=[audio_input, example_selector]
1070
- #)
1071
-
1072
- # Check if we're running on Hugging Face Spaces
1073
- is_spaces = os.getenv('SPACE_ID') is not None
1074
-
1075
- # Launch with appropriate settings
1076
- interface.launch(share=True) # Only share when not on Spaces
 
907
  }
908
  """
909
  # Create the Gradio interface with proper output handling
910
+ with gr.Blocks(title="Entornos Virtuales Afectivos - Procesamiento por Segmentos", css=custom_css) as interface:
911
  gr.Markdown("# Bello")
912
  gr.Markdown(
913
  """
914
+ ***Bello*** explora las sutilezas afectivas de la voz humana.
915
+ Usando técnicas multimodales de reconocimiento de emociones en el habla, el proyecto analiza parámetros acústicos, prosódicos
916
+ y semánticos del lenguaje hablado para generar entornos virtuales inmersivos en 360°.
917
 
918
+ ### Cómo interactuar
919
 
920
+ 1. Graba tu voz diciendo lo que quierasun poema, una canción, una historia, o cualquier frase que se te ocurra.
921
+ 2. Establece la duración de cada segmento para dividir tu grabación en trozos.
922
+ 3. Marca la casilla si quieres generar audio para cada segmento.
923
+ 4. Genera tu Entorno Virtual Afectivo y espera los resultados.
924
+ 5. Descarga el archivo HTML.
925
+ 6. Abre tu creación con cualquier navegador web.
926
 
927
  ---
928
+ **Más información:**
929
+ • Video Tutorial: [Cómo usar este espacio](https://youtu.be/eVD1lzwVhi8)
930
 
931
+ Para más detalles del proyecto, visita: [www.emotional-machines.com](https://www.emotional-machines.com)
932
  """
933
  )
934
 
935
 
936
  with gr.Row():
937
  with gr.Column(scale=2):
938
+ audio_input = gr.Audio(label="Audio de Entrada", type="filepath", sources=["microphone", "upload"])
939
 
940
+ # Ejemplos de audio (opcional)
941
+ # example_selector = gr.Dropdown(
942
+ # label="Seleccionar Audio de Ejemplo",
943
+ # choices=["Discurso Feliz", "Historia Triste", "Noticias Neutrales"],
944
+ # value=None,
945
+ # info="Elige entre audios pregrabados de ejemplo"
946
+ # )
947
 
948
+ #load_example_btn = gr.Button("Cargar Ejemplo", variant="secondary")
 
949
 
950
  with gr.Column(scale=1):
 
951
  chunk_duration_input = gr.Number(
952
+ label="Duración de Segmento (segundos)",
953
  value=10,
954
  minimum=1,
955
  maximum=60,
956
  step=1,
957
+ info="Duración de cada segmento de audio a procesar (1-60 segundos)"
958
  )
 
959
  generate_audio_checkbox = gr.Checkbox(
960
+ label="Generar Audio (puede tardar más)",
961
  value=False,
962
+ info="Desmarca para omitir la generación de música y acelerar el procesamiento"
963
  )
964
  with gr.Row():
965
+ process_btn = gr.Button("Generar", variant="primary")
966
+ clear_btn = gr.Button("Borrar Todo", variant="secondary")
967
 
 
968
  loading_indicator = gr.HTML("""
969
  <div id="loading" style="display: none; text-align: center; margin: 20px;">
970
+ <p style="font-size: 18px; color: #4a4a4a;">Procesando segmentos de audio...</p>
971
  <div style="border: 4px solid #f3f3f3; border-top: 4px solid #3498db; border-radius: 50%; width: 30px; height: 30px; animation: spin 2s linear infinite; margin: 0 auto;"></div>
972
  <style>@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }</style>
973
  </div>
974
  """)
975
 
 
976
  output_containers = []
977
+ group_components = [] # Contenedores de grupos
978
 
 
979
  for i in range(20):
980
  with gr.Group(visible=False) as chunk_group:
981
+ gr.Markdown(f"### Resultados del Segmento {i+1}")
982
  with gr.Row():
983
+ emotion_output = gr.Label(label="Predicción de Emoción Acústica")
984
+ transcription_output = gr.Label(label="Texto Transcrito")
985
+ sentiment_output = gr.Label(label="Análisis Sentimental")
986
  with gr.Row():
987
+ image_output = gr.Image(label="Imagen Equirectangular Generada")
988
+ image_360_output = gr.File(label="Descargar Imagen 360", type="filepath")
989
  with gr.Row():
990
+ audio_output = gr.Audio(label="Música Generada")
991
  gr.HTML("<hr style='margin: 20px 0; border: 1px solid #ccc;'>")
992
 
993
  group_components.append(chunk_group)
 
1000
  'music': audio_output
1001
  })
1002
 
 
1003
  with gr.Group(visible=True, elem_classes="download-section") as download_group:
 
 
 
 
 
 
 
1004
  viewer_html_output = gr.File(
1005
+ label="Una vez finalizado el procesamiento, descarga tu EVA aquí 🚀",
1006
  type="filepath",
1007
  interactive=False,
1008
  elem_classes="download-button"
1009
  )
1010
 
 
1011
  js_output = gr.HTML(visible=False)
1012
 
 
 
 
 
 
 
 
 
 
 
 
 
1013
  process_btn.click(
1014
  fn=process_and_display,
1015
  inputs=[audio_input, generate_audio_checkbox, chunk_duration_input],
 
1036
  ]] + [loading_indicator, chunk_duration_input, viewer_html_output, js_output]
1037
  )
1038
 
1039
+ interface.launch(share=True)