Spaces:

LeoWalker
/

HypePack

Sleeping

App Files Files Community

LeoWalker commited on Nov 22, 2024

Commit

e907f95

1 Parent(s): 6b87a11

updated the app and nodes to help debug the streaming issue to hugging face. full pipeline works locally on docker but slight issue with huggingface.

Browse files

Files changed (2) hide show

app.py +21 -0
hype_pack/utils/nodes.py +26 -11

app.py CHANGED Viewed

@@ -1,11 +1,32 @@
 import os
 import sys
 # Add the package directory to Python path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 # Import and run the Streamlit app
 from hype_pack.streamlit_app import main
 if __name__ == "__main__":
     main()

 import os
 import sys
+import streamlit as st
 # Add the package directory to Python path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+# Configure Streamlit page before importing the main app
+st.set_page_config(
+    page_title="HypeCast Generator",
+    page_icon="🎙️",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
 # Import and run the Streamlit app
 from hype_pack.streamlit_app import main
+# Remove the duplicate page config from streamlit_app.py
 if __name__ == "__main__":
+    # Enable CORS for audio streaming
+    headers = {
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+        "Access-Control-Allow-Headers": "Content-Type"
+    }
+    # Initialize session state if needed
+    if 'audio_bytes' not in st.session_state:
+        st.session_state.audio_bytes = None
     main()

hype_pack/utils/nodes.py CHANGED Viewed

@@ -216,26 +216,41 @@ async def text_to_speech_node(interview_state: InterviewState, selected_speaker:
         text_content = str(interview_state.transcript.content)
         print(f"Converting text: {text_content[:100]}...")
         async with speech as s:
-            result = await s.synthesize(
-                text=text_content,
-                voice=voice_id,
-                format='mp3',
-                sample_rate=24000,
-                speed=1.0
-            )
-            print(f"API Response received. Size: {len(result)} bytes")
-        # Store bytes directly in interview_state instead of writing to file
         if result and isinstance(result, bytes):
             interview_state.audio_bytes = result
-            print(f"Stored {len(result)} bytes in interview_state")
     except Exception as e:
         print(f"Error in text-to-speech conversion: {str(e)}")
         logging.error(f"Error in text-to-speech conversion: {str(e)}")
         interview_state.audio_bytes = None
     return interview_state

         text_content = str(interview_state.transcript.content)
         print(f"Converting text: {text_content[:100]}...")
+        print(f"Using voice_id: {voice_id}")
+        print(f"LMNT API Key present: {'Yes' if os.getenv('LMNT_API_KEY') else 'No'}")
         async with speech as s:
+            try:
+                result = await s.synthesize(
+                    text=text_content,
+                    voice=voice_id,
+                    format='mp3',
+                    sample_rate=24000,
+                    speed=1.0
+                )
+                print(f"API Response received. Size: {len(result)} bytes")
+                print(f"Response type: {type(result)}")
+                if not result or len(result) < 100:  # Assuming valid MP3 files are larger than 100 bytes
+                    raise ValueError(f"Invalid audio response size: {len(result)} bytes")
+            except Exception as api_error:
+                print(f"LMNT API Error: {str(api_error)}")
+                raise
+        # Store bytes directly in interview_state
         if result and isinstance(result, bytes):
             interview_state.audio_bytes = result
+            print(f"Successfully stored {len(result)} bytes in interview_state")
+        else:
+            raise ValueError(f"Unexpected response type: {type(result)}")
     except Exception as e:
         print(f"Error in text-to-speech conversion: {str(e)}")
         logging.error(f"Error in text-to-speech conversion: {str(e)}")
         interview_state.audio_bytes = None
+        # Optionally, set an error message that can be displayed in the UI
+        st.error(f"Failed to generate audio: {str(e)}")
     return interview_state