Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,7 +49,7 @@ def transcribe_audio(path):
|
|
| 49 |
if not WHISPER_AVAILABLE:
|
| 50 |
return "β Whisper not available. Please install openai-whisper."
|
| 51 |
try:
|
| 52 |
-
model = whisper.load_model("tiny")
|
| 53 |
result = model.transcribe(path)
|
| 54 |
return result["text"]
|
| 55 |
except Exception as e:
|
|
@@ -109,6 +109,24 @@ def save_cookies(file):
|
|
| 109 |
print(f"β Failed to handle cookies.txt: {e}")
|
| 110 |
return None
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
# YouTube flow
|
| 113 |
def run_pipeline(url, cookies_file, show_transcript):
|
| 114 |
try:
|
|
@@ -122,7 +140,11 @@ def run_pipeline(url, cookies_file, show_transcript):
|
|
| 122 |
if not audio_path:
|
| 123 |
return status, ""
|
| 124 |
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
if transcript.startswith("β"):
|
| 127 |
return transcript, ""
|
| 128 |
|
|
@@ -137,6 +159,7 @@ def run_pipeline(url, cookies_file, show_transcript):
|
|
| 137 |
print(tb)
|
| 138 |
return f"β Unhandled Error:\n{tb}", ""
|
| 139 |
|
|
|
|
| 140 |
# Audio upload flow
|
| 141 |
def run_pipeline_audio(audio_file, show_transcript):
|
| 142 |
try:
|
|
@@ -145,6 +168,7 @@ def run_pipeline_audio(audio_file, show_transcript):
|
|
| 145 |
if audio_file is None:
|
| 146 |
return "β No audio file uploaded", ""
|
| 147 |
|
|
|
|
| 148 |
temp_audio_path = tempfile.mktemp(suffix=os.path.splitext(str(audio_file))[-1])
|
| 149 |
if hasattr(audio_file, "read"):
|
| 150 |
with open(temp_audio_path, "wb") as f:
|
|
@@ -152,7 +176,11 @@ def run_pipeline_audio(audio_file, show_transcript):
|
|
| 152 |
else:
|
| 153 |
shutil.copy(str(audio_file), temp_audio_path)
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
if transcript.startswith("β"):
|
| 157 |
return transcript, ""
|
| 158 |
|
|
@@ -167,6 +195,7 @@ def run_pipeline_audio(audio_file, show_transcript):
|
|
| 167 |
print(tb)
|
| 168 |
return f"β Unhandled Error:\n{tb}", ""
|
| 169 |
|
|
|
|
| 170 |
# Gradio UI
|
| 171 |
with gr.Blocks(title="Stock Insights from YouTube or Audio") as demo:
|
| 172 |
gr.Markdown("""
|
|
|
|
| 49 |
if not WHISPER_AVAILABLE:
|
| 50 |
return "β Whisper not available. Please install openai-whisper."
|
| 51 |
try:
|
| 52 |
+
model = whisper.load_model("tiny.en")
|
| 53 |
result = model.transcribe(path)
|
| 54 |
return result["text"]
|
| 55 |
except Exception as e:
|
|
|
|
| 109 |
print(f"β Failed to handle cookies.txt: {e}")
|
| 110 |
return None
|
| 111 |
|
| 112 |
+
|
| 113 |
+
# β
Trim audio to shorter length (2 minutes) for CPU speed
|
| 114 |
+
import subprocess
|
| 115 |
+
|
| 116 |
+
def trim_audio(input_path, output_path, duration_sec=120):
|
| 117 |
+
try:
|
| 118 |
+
command = [
|
| 119 |
+
"ffmpeg", "-y", "-i", input_path,
|
| 120 |
+
"-t", str(duration_sec), # duration in seconds
|
| 121 |
+
"-c", "copy", output_path
|
| 122 |
+
]
|
| 123 |
+
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 124 |
+
return output_path
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print("β Error trimming audio:", e)
|
| 127 |
+
return input_path
|
| 128 |
+
|
| 129 |
+
|
| 130 |
# YouTube flow
|
| 131 |
def run_pipeline(url, cookies_file, show_transcript):
|
| 132 |
try:
|
|
|
|
| 140 |
if not audio_path:
|
| 141 |
return status, ""
|
| 142 |
|
| 143 |
+
# β± Trim audio to 2 minutes before transcription
|
| 144 |
+
trimmed_path = tempfile.mktemp(suffix=".mp3")
|
| 145 |
+
trim_audio(audio_path, trimmed_path)
|
| 146 |
+
|
| 147 |
+
transcript = transcribe_audio(trimmed_path)
|
| 148 |
if transcript.startswith("β"):
|
| 149 |
return transcript, ""
|
| 150 |
|
|
|
|
| 159 |
print(tb)
|
| 160 |
return f"β Unhandled Error:\n{tb}", ""
|
| 161 |
|
| 162 |
+
|
| 163 |
# Audio upload flow
|
| 164 |
def run_pipeline_audio(audio_file, show_transcript):
|
| 165 |
try:
|
|
|
|
| 168 |
if audio_file is None:
|
| 169 |
return "β No audio file uploaded", ""
|
| 170 |
|
| 171 |
+
# Save uploaded file
|
| 172 |
temp_audio_path = tempfile.mktemp(suffix=os.path.splitext(str(audio_file))[-1])
|
| 173 |
if hasattr(audio_file, "read"):
|
| 174 |
with open(temp_audio_path, "wb") as f:
|
|
|
|
| 176 |
else:
|
| 177 |
shutil.copy(str(audio_file), temp_audio_path)
|
| 178 |
|
| 179 |
+
# β± Trim audio to 2 minutes
|
| 180 |
+
trimmed_path = tempfile.mktemp(suffix=".mp3")
|
| 181 |
+
trim_audio(temp_audio_path, trimmed_path)
|
| 182 |
+
|
| 183 |
+
transcript = transcribe_audio(trimmed_path)
|
| 184 |
if transcript.startswith("β"):
|
| 185 |
return transcript, ""
|
| 186 |
|
|
|
|
| 195 |
print(tb)
|
| 196 |
return f"β Unhandled Error:\n{tb}", ""
|
| 197 |
|
| 198 |
+
|
| 199 |
# Gradio UI
|
| 200 |
with gr.Blocks(title="Stock Insights from YouTube or Audio") as demo:
|
| 201 |
gr.Markdown("""
|