Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
# β
Combined YouTube Analyzer with Stock Info Extractor
|
| 2 |
-
# β¬οΈ Based on your working app + whisper + stock extraction
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import os
|
|
@@ -29,7 +28,6 @@ except ImportError:
|
|
| 29 |
|
| 30 |
def extract_stock_info_simple(text):
|
| 31 |
try:
|
| 32 |
-
stock_info = []
|
| 33 |
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
|
| 34 |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
|
| 35 |
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
|
|
@@ -83,62 +81,84 @@ def transcribe_audio(file_path):
|
|
| 83 |
except Exception as e:
|
| 84 |
return "β Transcription failed", str(e)
|
| 85 |
|
| 86 |
-
#
|
| 87 |
|
| 88 |
def download_audio_youtube(url, cookies_file=None):
|
| 89 |
try:
|
| 90 |
temp_dir = tempfile.mkdtemp()
|
| 91 |
out_path = os.path.join(temp_dir, "audio")
|
| 92 |
-
|
|
|
|
| 93 |
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
| 94 |
'outtmpl': out_path + '.%(ext)s',
|
| 95 |
'quiet': True,
|
| 96 |
'noplaylist': True,
|
| 97 |
-
'cookiefile': cookies_file if cookies_file else None,
|
| 98 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
| 99 |
'referer': 'https://www.youtube.com/',
|
| 100 |
'force_ipv4': True,
|
| 101 |
-
'
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
}
|
|
|
|
| 112 |
with YoutubeDL(ydl_opts) as ydl:
|
| 113 |
ydl.download([url])
|
|
|
|
| 114 |
for ext in ['.m4a', '.mp3', '.webm']:
|
| 115 |
full_path = out_path + ext
|
| 116 |
if os.path.exists(full_path):
|
| 117 |
return full_path, "β
Audio downloaded"
|
|
|
|
| 118 |
return None, "β Audio file not found"
|
|
|
|
| 119 |
except Exception as e:
|
|
|
|
|
|
|
| 120 |
return None, f"β Download error: {str(e)}"
|
| 121 |
|
| 122 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
def full_pipeline(url, cookies):
|
| 125 |
if not url:
|
| 126 |
return "β Enter a valid YouTube URL", "", ""
|
| 127 |
-
|
| 128 |
temp_cookie = save_uploaded_cookie(cookies)
|
| 129 |
-
|
| 130 |
audio_path, msg = download_audio_youtube(url, temp_cookie)
|
| 131 |
if not audio_path:
|
| 132 |
return msg, "", ""
|
| 133 |
-
|
| 134 |
transcript, tmsg = transcribe_audio(audio_path)
|
| 135 |
if "β" in transcript:
|
| 136 |
return msg, transcript, tmsg
|
| 137 |
-
|
| 138 |
stock_data = extract_stock_info_simple(transcript)
|
| 139 |
return "β
Complete", transcript, stock_data
|
| 140 |
|
| 141 |
-
|
| 142 |
# Gradio App
|
| 143 |
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
| 144 |
gr.Markdown("""
|
|
@@ -148,7 +168,7 @@ with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
|
| 148 |
|
| 149 |
with gr.Row():
|
| 150 |
url_input = gr.Textbox(label="YouTube URL")
|
| 151 |
-
cookies_input = gr.File(label="cookies.txt (
|
| 152 |
|
| 153 |
run_btn = gr.Button("π Run Extraction")
|
| 154 |
status = gr.Textbox(label="Status")
|
|
|
|
| 1 |
+
# β
Combined YouTube Analyzer with Stock Info Extractor (fixed download using working app logic)
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
|
|
|
| 28 |
|
| 29 |
def extract_stock_info_simple(text):
|
| 30 |
try:
|
|
|
|
| 31 |
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
|
| 32 |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
|
| 33 |
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
return "β Transcription failed", str(e)
|
| 83 |
|
| 84 |
+
# β
Reused working download logic from other app
|
| 85 |
|
| 86 |
def download_audio_youtube(url, cookies_file=None):
|
| 87 |
try:
|
| 88 |
temp_dir = tempfile.mkdtemp()
|
| 89 |
out_path = os.path.join(temp_dir, "audio")
|
| 90 |
+
|
| 91 |
+
ydl_opts = {
|
| 92 |
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
| 93 |
'outtmpl': out_path + '.%(ext)s',
|
| 94 |
'quiet': True,
|
| 95 |
'noplaylist': True,
|
|
|
|
| 96 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
| 97 |
'referer': 'https://www.youtube.com/',
|
| 98 |
'force_ipv4': True,
|
| 99 |
+
'extractor_retries': 3,
|
| 100 |
+
'fragment_retries': 3,
|
| 101 |
+
'retry_sleep_functions': {'http': lambda n: 2 ** n},
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
if cookies_file and os.path.exists(cookies_file):
|
| 105 |
+
ydl_opts['cookiefile'] = cookies_file
|
| 106 |
+
else:
|
| 107 |
+
print("β οΈ No cookies file provided")
|
| 108 |
+
|
| 109 |
+
ydl_opts['http_headers'] = {
|
| 110 |
+
'User-Agent': ydl_opts['user_agent'],
|
| 111 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
| 112 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 113 |
+
'Accept-Encoding': 'gzip, deflate',
|
| 114 |
+
'DNT': '1',
|
| 115 |
+
'Connection': 'keep-alive',
|
| 116 |
+
'Upgrade-Insecure-Requests': '1',
|
| 117 |
+
'Referer': 'https://www.youtube.com/',
|
| 118 |
}
|
| 119 |
+
|
| 120 |
with YoutubeDL(ydl_opts) as ydl:
|
| 121 |
ydl.download([url])
|
| 122 |
+
|
| 123 |
for ext in ['.m4a', '.mp3', '.webm']:
|
| 124 |
full_path = out_path + ext
|
| 125 |
if os.path.exists(full_path):
|
| 126 |
return full_path, "β
Audio downloaded"
|
| 127 |
+
|
| 128 |
return None, "β Audio file not found"
|
| 129 |
+
|
| 130 |
except Exception as e:
|
| 131 |
+
import traceback
|
| 132 |
+
traceback.print_exc()
|
| 133 |
return None, f"β Download error: {str(e)}"
|
| 134 |
|
| 135 |
+
# Copy cookie to tmp
|
| 136 |
+
|
| 137 |
+
def save_uploaded_cookie(cookies):
|
| 138 |
+
if cookies is None:
|
| 139 |
+
return None
|
| 140 |
+
temp_cookie_path = tempfile.mktemp(suffix=".txt")
|
| 141 |
+
shutil.copy2(cookies.name, temp_cookie_path)
|
| 142 |
+
return temp_cookie_path
|
| 143 |
+
|
| 144 |
+
# Gradio app logic
|
| 145 |
|
| 146 |
def full_pipeline(url, cookies):
|
| 147 |
if not url:
|
| 148 |
return "β Enter a valid YouTube URL", "", ""
|
| 149 |
+
|
| 150 |
temp_cookie = save_uploaded_cookie(cookies)
|
|
|
|
| 151 |
audio_path, msg = download_audio_youtube(url, temp_cookie)
|
| 152 |
if not audio_path:
|
| 153 |
return msg, "", ""
|
| 154 |
+
|
| 155 |
transcript, tmsg = transcribe_audio(audio_path)
|
| 156 |
if "β" in transcript:
|
| 157 |
return msg, transcript, tmsg
|
| 158 |
+
|
| 159 |
stock_data = extract_stock_info_simple(transcript)
|
| 160 |
return "β
Complete", transcript, stock_data
|
| 161 |
|
|
|
|
| 162 |
# Gradio App
|
| 163 |
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
| 164 |
gr.Markdown("""
|
|
|
|
| 168 |
|
| 169 |
with gr.Row():
|
| 170 |
url_input = gr.Textbox(label="YouTube URL")
|
| 171 |
+
cookies_input = gr.File(label="cookies.txt (exported from YouTube tab)", file_types=[".txt"])
|
| 172 |
|
| 173 |
run_btn = gr.Button("π Run Extraction")
|
| 174 |
status = gr.Textbox(label="Status")
|