updated the app and nodes to help debug the streaming issue to hugging face. full pipeline works locally on docker but slight issue with huggingface.
Browse files- app.py +21 -0
- hype_pack/utils/nodes.py +26 -11
app.py
CHANGED
|
@@ -1,11 +1,32 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
| 3 |
|
| 4 |
# Add the package directory to Python path
|
| 5 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Import and run the Streamlit app
|
| 8 |
from hype_pack.streamlit_app import main
|
| 9 |
|
|
|
|
| 10 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
main()
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
+
import streamlit as st
|
| 4 |
|
| 5 |
# Add the package directory to Python path
|
| 6 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 7 |
|
| 8 |
+
# Configure Streamlit page before importing the main app
|
| 9 |
+
st.set_page_config(
|
| 10 |
+
page_title="HypeCast Generator",
|
| 11 |
+
page_icon="🎙️",
|
| 12 |
+
layout="wide",
|
| 13 |
+
initial_sidebar_state="collapsed"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
# Import and run the Streamlit app
|
| 17 |
from hype_pack.streamlit_app import main
|
| 18 |
|
| 19 |
+
# Remove the duplicate page config from streamlit_app.py
|
| 20 |
if __name__ == "__main__":
|
| 21 |
+
# Enable CORS for audio streaming
|
| 22 |
+
headers = {
|
| 23 |
+
"Access-Control-Allow-Origin": "*",
|
| 24 |
+
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
| 25 |
+
"Access-Control-Allow-Headers": "Content-Type"
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
# Initialize session state if needed
|
| 29 |
+
if 'audio_bytes' not in st.session_state:
|
| 30 |
+
st.session_state.audio_bytes = None
|
| 31 |
+
|
| 32 |
main()
|
hype_pack/utils/nodes.py
CHANGED
|
@@ -216,26 +216,41 @@ async def text_to_speech_node(interview_state: InterviewState, selected_speaker:
|
|
| 216 |
|
| 217 |
text_content = str(interview_state.transcript.content)
|
| 218 |
print(f"Converting text: {text_content[:100]}...")
|
|
|
|
|
|
|
| 219 |
|
| 220 |
async with speech as s:
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
if result and isinstance(result, bytes):
|
| 232 |
interview_state.audio_bytes = result
|
| 233 |
-
print(f"
|
|
|
|
|
|
|
| 234 |
|
| 235 |
except Exception as e:
|
| 236 |
print(f"Error in text-to-speech conversion: {str(e)}")
|
| 237 |
logging.error(f"Error in text-to-speech conversion: {str(e)}")
|
| 238 |
interview_state.audio_bytes = None
|
|
|
|
|
|
|
| 239 |
|
| 240 |
return interview_state
|
| 241 |
|
|
|
|
| 216 |
|
| 217 |
text_content = str(interview_state.transcript.content)
|
| 218 |
print(f"Converting text: {text_content[:100]}...")
|
| 219 |
+
print(f"Using voice_id: {voice_id}")
|
| 220 |
+
print(f"LMNT API Key present: {'Yes' if os.getenv('LMNT_API_KEY') else 'No'}")
|
| 221 |
|
| 222 |
async with speech as s:
|
| 223 |
+
try:
|
| 224 |
+
result = await s.synthesize(
|
| 225 |
+
text=text_content,
|
| 226 |
+
voice=voice_id,
|
| 227 |
+
format='mp3',
|
| 228 |
+
sample_rate=24000,
|
| 229 |
+
speed=1.0
|
| 230 |
+
)
|
| 231 |
+
print(f"API Response received. Size: {len(result)} bytes")
|
| 232 |
+
print(f"Response type: {type(result)}")
|
| 233 |
+
|
| 234 |
+
if not result or len(result) < 100: # Assuming valid MP3 files are larger than 100 bytes
|
| 235 |
+
raise ValueError(f"Invalid audio response size: {len(result)} bytes")
|
| 236 |
+
|
| 237 |
+
except Exception as api_error:
|
| 238 |
+
print(f"LMNT API Error: {str(api_error)}")
|
| 239 |
+
raise
|
| 240 |
+
|
| 241 |
+
# Store bytes directly in interview_state
|
| 242 |
if result and isinstance(result, bytes):
|
| 243 |
interview_state.audio_bytes = result
|
| 244 |
+
print(f"Successfully stored {len(result)} bytes in interview_state")
|
| 245 |
+
else:
|
| 246 |
+
raise ValueError(f"Unexpected response type: {type(result)}")
|
| 247 |
|
| 248 |
except Exception as e:
|
| 249 |
print(f"Error in text-to-speech conversion: {str(e)}")
|
| 250 |
logging.error(f"Error in text-to-speech conversion: {str(e)}")
|
| 251 |
interview_state.audio_bytes = None
|
| 252 |
+
# Optionally, set an error message that can be displayed in the UI
|
| 253 |
+
st.error(f"Failed to generate audio: {str(e)}")
|
| 254 |
|
| 255 |
return interview_state
|
| 256 |
|