Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -421,58 +421,151 @@ def validate_youtube_url(url):
|
|
| 421 |
return False, "Invalid YouTube URL format"
|
| 422 |
|
| 423 |
def process_video(url, cookies_file, progress=gr.Progress()):
|
| 424 |
-
"""Main function to process YouTube video"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
# Check if required packages are available
|
| 427 |
if not YT_DLP_AVAILABLE:
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
if not WHISPER_AVAILABLE:
|
| 431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
|
| 433 |
# Validate URL
|
| 434 |
is_valid, validation_msg = validate_youtube_url(url)
|
| 435 |
if not is_valid:
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
|
| 438 |
audio_path = None
|
| 439 |
cookies_temp_path = None
|
| 440 |
|
| 441 |
try:
|
| 442 |
progress(0.05, desc="π Validating URL...")
|
|
|
|
| 443 |
|
| 444 |
# Process cookies file if provided
|
| 445 |
progress(0.1, desc="πͺ Processing cookies...")
|
| 446 |
cookies_temp_path = process_cookies_file(cookies_file)
|
| 447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
status_msg = "β
Cookies loaded" if cookies_temp_path else "β οΈ No cookies (may encounter restrictions)"
|
| 449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
# Download audio
|
| 451 |
progress(0.2, desc="π₯ Downloading audio...")
|
|
|
|
| 452 |
audio_path = download_audio(url, cookies_temp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
# Transcribe audio
|
| 455 |
progress(0.6, desc="ποΈ Transcribing audio...")
|
|
|
|
| 456 |
transcript = transcribe_audio(audio_path)
|
|
|
|
| 457 |
|
| 458 |
if not transcript.strip():
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
# Extract stock information
|
| 462 |
progress(0.9, desc="π Analyzing content...")
|
|
|
|
| 463 |
stock_details = extract_stock_info_enhanced(transcript)
|
|
|
|
| 464 |
|
| 465 |
progress(1.0, desc="β
Complete!")
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
except Exception as e:
|
| 469 |
-
error_msg =
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
finally:
|
| 473 |
# Clean up temporary files
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
# Create Gradio interface optimized for Gradio Cloud
|
| 478 |
with gr.Blocks(
|
|
@@ -545,9 +638,23 @@ with gr.Blocks(
|
|
| 545 |
with gr.Group():
|
| 546 |
gr.Markdown("### π₯ Input")
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
# Cookies upload with better instructions
|
| 549 |
cookies_input = gr.File(
|
| 550 |
-
label="πͺ Upload Cookies File (cookies.txt) -
|
| 551 |
file_types=[".txt"],
|
| 552 |
file_count="single"
|
| 553 |
)
|
|
@@ -588,7 +695,7 @@ with gr.Blocks(
|
|
| 588 |
# Status display
|
| 589 |
status_output = gr.Textbox(
|
| 590 |
label="π Status",
|
| 591 |
-
lines=
|
| 592 |
interactive=False,
|
| 593 |
info="Current processing status"
|
| 594 |
)
|
|
@@ -673,11 +780,62 @@ with gr.Blocks(
|
|
| 673 |
status = check_requirements()
|
| 674 |
return gr.update(value=status, visible=True)
|
| 675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
check_req_btn.click(
|
| 677 |
fn=show_requirements,
|
| 678 |
outputs=[requirements_output]
|
| 679 |
)
|
| 680 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
process_btn.click(
|
| 682 |
fn=process_video,
|
| 683 |
inputs=[url_input, cookies_input],
|
|
|
|
| 421 |
return False, "Invalid YouTube URL format"
|
| 422 |
|
| 423 |
def process_video(url, cookies_file, progress=gr.Progress()):
|
| 424 |
+
"""Main function to process YouTube video with detailed debugging"""
|
| 425 |
+
|
| 426 |
+
# Detailed debugging info
|
| 427 |
+
debug_info = []
|
| 428 |
+
debug_info.append(f"π Starting process at {time.strftime('%H:%M:%S')}")
|
| 429 |
+
debug_info.append(f"π‘ Python version: {sys.version.split()[0]}")
|
| 430 |
+
debug_info.append(f"π¦ yt-dlp available: {YT_DLP_AVAILABLE}")
|
| 431 |
+
debug_info.append(f"ποΈ Whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
|
| 432 |
|
| 433 |
# Check if required packages are available
|
| 434 |
if not YT_DLP_AVAILABLE:
|
| 435 |
+
error_msg = "β ERROR: yt-dlp is not installed properly.\n\n"
|
| 436 |
+
error_msg += "SOLUTION: Install yt-dlp using:\n"
|
| 437 |
+
error_msg += "pip install yt-dlp\n\n"
|
| 438 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
| 439 |
+
return error_msg, "", "β Missing yt-dlp"
|
| 440 |
|
| 441 |
if not WHISPER_AVAILABLE:
|
| 442 |
+
error_msg = "β ERROR: OpenAI Whisper is not installed properly.\n\n"
|
| 443 |
+
error_msg += "SOLUTION: Install Whisper using:\n"
|
| 444 |
+
error_msg += "pip install openai-whisper\n"
|
| 445 |
+
error_msg += "OR\n"
|
| 446 |
+
error_msg += "pip install transformers torch torchaudio\n\n"
|
| 447 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
| 448 |
+
return error_msg, "", "β Missing Whisper"
|
| 449 |
|
| 450 |
# Validate URL
|
| 451 |
is_valid, validation_msg = validate_youtube_url(url)
|
| 452 |
if not is_valid:
|
| 453 |
+
error_msg = f"β ERROR: {validation_msg}\n\n"
|
| 454 |
+
error_msg += f"PROVIDED URL: {url}\n\n"
|
| 455 |
+
error_msg += "VALID URL FORMATS:\n"
|
| 456 |
+
error_msg += "β’ https://www.youtube.com/watch?v=VIDEO_ID\n"
|
| 457 |
+
error_msg += "β’ https://youtu.be/VIDEO_ID\n"
|
| 458 |
+
error_msg += "β’ https://www.youtube.com/embed/VIDEO_ID\n\n"
|
| 459 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
| 460 |
+
return error_msg, "", "β Invalid URL"
|
| 461 |
|
| 462 |
audio_path = None
|
| 463 |
cookies_temp_path = None
|
| 464 |
|
| 465 |
try:
|
| 466 |
progress(0.05, desc="π Validating URL...")
|
| 467 |
+
debug_info.append(f"β
URL validation passed: {url}")
|
| 468 |
|
| 469 |
# Process cookies file if provided
|
| 470 |
progress(0.1, desc="πͺ Processing cookies...")
|
| 471 |
cookies_temp_path = process_cookies_file(cookies_file)
|
| 472 |
|
| 473 |
+
if cookies_temp_path:
|
| 474 |
+
debug_info.append(f"β
Cookies processed: {cookies_temp_path}")
|
| 475 |
+
else:
|
| 476 |
+
debug_info.append("β οΈ No cookies provided - this may cause access errors")
|
| 477 |
+
|
| 478 |
status_msg = "β
Cookies loaded" if cookies_temp_path else "β οΈ No cookies (may encounter restrictions)"
|
| 479 |
|
| 480 |
+
# First, try to get video info for debugging
|
| 481 |
+
progress(0.15, desc="π Checking video accessibility...")
|
| 482 |
+
try:
|
| 483 |
+
video_info = get_video_info(url, cookies_temp_path)
|
| 484 |
+
if 'error' in video_info:
|
| 485 |
+
debug_info.append(f"β Video info error: {video_info['error']}")
|
| 486 |
+
raise Exception(f"Video accessibility check failed: {video_info['error']}")
|
| 487 |
+
else:
|
| 488 |
+
debug_info.append(f"β
Video info: {video_info}")
|
| 489 |
+
except Exception as e:
|
| 490 |
+
debug_info.append(f"β Video info check failed: {str(e)}")
|
| 491 |
+
# Continue anyway, but log the issue
|
| 492 |
+
|
| 493 |
# Download audio
|
| 494 |
progress(0.2, desc="π₯ Downloading audio...")
|
| 495 |
+
debug_info.append("π Starting audio download...")
|
| 496 |
audio_path = download_audio(url, cookies_temp_path)
|
| 497 |
+
debug_info.append(f"β
Audio downloaded: {audio_path}")
|
| 498 |
+
|
| 499 |
+
# Check if audio file exists and get size
|
| 500 |
+
if audio_path and os.path.exists(audio_path):
|
| 501 |
+
file_size = os.path.getsize(audio_path)
|
| 502 |
+
debug_info.append(f"π Audio file size: {file_size/1024/1024:.2f} MB")
|
| 503 |
+
else:
|
| 504 |
+
raise Exception("Audio file not found after download")
|
| 505 |
|
| 506 |
# Transcribe audio
|
| 507 |
progress(0.6, desc="ποΈ Transcribing audio...")
|
| 508 |
+
debug_info.append("π Starting transcription...")
|
| 509 |
transcript = transcribe_audio(audio_path)
|
| 510 |
+
debug_info.append(f"β
Transcription completed: {len(transcript)} characters")
|
| 511 |
|
| 512 |
if not transcript.strip():
|
| 513 |
+
error_msg = "β ERROR: No speech detected in the video\n\n"
|
| 514 |
+
error_msg += "POSSIBLE CAUSES:\n"
|
| 515 |
+
error_msg += "β’ Video has no audio track\n"
|
| 516 |
+
error_msg += "β’ Audio is too quiet or unclear\n"
|
| 517 |
+
error_msg += "β’ Video is not in English\n"
|
| 518 |
+
error_msg += "β’ Audio file is corrupted\n\n"
|
| 519 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
| 520 |
+
return error_msg, "", "β No speech detected"
|
| 521 |
|
| 522 |
# Extract stock information
|
| 523 |
progress(0.9, desc="π Analyzing content...")
|
| 524 |
+
debug_info.append("π Starting stock analysis...")
|
| 525 |
stock_details = extract_stock_info_enhanced(transcript)
|
| 526 |
+
debug_info.append("β
Stock analysis completed")
|
| 527 |
|
| 528 |
progress(1.0, desc="β
Complete!")
|
| 529 |
+
|
| 530 |
+
# Add debug info to transcript
|
| 531 |
+
debug_section = "\n\n" + "="*50 + "\n"
|
| 532 |
+
debug_section += "π DEBUG INFORMATION\n"
|
| 533 |
+
debug_section += "="*50 + "\n"
|
| 534 |
+
debug_section += "\n".join(debug_info)
|
| 535 |
+
|
| 536 |
+
return transcript + debug_section, stock_details, "β
Processing completed successfully"
|
| 537 |
|
| 538 |
except Exception as e:
|
| 539 |
+
error_msg = f"β DETAILED ERROR INFORMATION:\n\n"
|
| 540 |
+
error_msg += f"ERROR MESSAGE: {str(e)}\n\n"
|
| 541 |
+
error_msg += f"ERROR TYPE: {type(e).__name__}\n\n"
|
| 542 |
+
|
| 543 |
+
# Add context based on where the error occurred
|
| 544 |
+
if "download" in str(e).lower():
|
| 545 |
+
error_msg += "π§ DOWNLOAD TROUBLESHOOTING:\n"
|
| 546 |
+
error_msg += "β’ Check if video URL is accessible in browser\n"
|
| 547 |
+
error_msg += "β’ Upload fresh cookies.txt file\n"
|
| 548 |
+
error_msg += "β’ Try a different video\n"
|
| 549 |
+
error_msg += "β’ Wait 10-15 minutes if rate limited\n\n"
|
| 550 |
+
elif "transcribe" in str(e).lower():
|
| 551 |
+
error_msg += "π§ TRANSCRIPTION TROUBLESHOOTING:\n"
|
| 552 |
+
error_msg += "β’ Check if audio file was downloaded properly\n"
|
| 553 |
+
error_msg += "β’ Ensure video has clear audio\n"
|
| 554 |
+
error_msg += "β’ Try a shorter video\n\n"
|
| 555 |
+
|
| 556 |
+
error_msg += "π PROCESSING STEPS COMPLETED:\n"
|
| 557 |
+
error_msg += "\n".join(debug_info)
|
| 558 |
+
|
| 559 |
+
return error_msg, "", f"β Error: {type(e).__name__}"
|
| 560 |
|
| 561 |
finally:
|
| 562 |
# Clean up temporary files
|
| 563 |
+
if audio_path:
|
| 564 |
+
debug_info.append(f"ποΈ Cleaning up: {audio_path}")
|
| 565 |
+
cleanup_file(audio_path)
|
| 566 |
+
if cookies_temp_path:
|
| 567 |
+
debug_info.append(f"ποΈ Cleaning up: {cookies_temp_path}")
|
| 568 |
+
cleanup_file(cookies_temp_path)
|
| 569 |
|
| 570 |
# Create Gradio interface optimized for Gradio Cloud
|
| 571 |
with gr.Blocks(
|
|
|
|
| 638 |
with gr.Group():
|
| 639 |
gr.Markdown("### π₯ Input")
|
| 640 |
|
| 641 |
+
# Add a test button first
|
| 642 |
+
test_btn = gr.Button(
|
| 643 |
+
"π§ͺ Test System (Click First!)",
|
| 644 |
+
variant="secondary",
|
| 645 |
+
size="sm"
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
test_output = gr.Textbox(
|
| 649 |
+
label="π§ͺ System Test Results",
|
| 650 |
+
lines=5,
|
| 651 |
+
visible=False,
|
| 652 |
+
interactive=False
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
# Cookies upload with better instructions
|
| 656 |
cookies_input = gr.File(
|
| 657 |
+
label="πͺ Upload Cookies File (cookies.txt) - HIGHLY RECOMMENDED",
|
| 658 |
file_types=[".txt"],
|
| 659 |
file_count="single"
|
| 660 |
)
|
|
|
|
| 695 |
# Status display
|
| 696 |
status_output = gr.Textbox(
|
| 697 |
label="π Status",
|
| 698 |
+
lines=3,
|
| 699 |
interactive=False,
|
| 700 |
info="Current processing status"
|
| 701 |
)
|
|
|
|
| 780 |
status = check_requirements()
|
| 781 |
return gr.update(value=status, visible=True)
|
| 782 |
|
| 783 |
+
def test_system():
|
| 784 |
+
"""Test system components and return detailed status"""
|
| 785 |
+
test_results = []
|
| 786 |
+
test_results.append("π§ͺ SYSTEM TEST RESULTS")
|
| 787 |
+
test_results.append("="*30)
|
| 788 |
+
|
| 789 |
+
# Test imports
|
| 790 |
+
test_results.append(f"β
yt-dlp: {'Available' if YT_DLP_AVAILABLE else 'NOT AVAILABLE'}")
|
| 791 |
+
test_results.append(f"β
Whisper: {'Available' if WHISPER_AVAILABLE else 'NOT AVAILABLE'} (Type: {WHISPER_TYPE})")
|
| 792 |
+
|
| 793 |
+
# Test yt-dlp functionality
|
| 794 |
+
if YT_DLP_AVAILABLE:
|
| 795 |
+
try:
|
| 796 |
+
from yt_dlp import YoutubeDL
|
| 797 |
+
test_ydl = YoutubeDL({'quiet': True})
|
| 798 |
+
test_results.append("β
yt-dlp: Can create YoutubeDL instance")
|
| 799 |
+
except Exception as e:
|
| 800 |
+
test_results.append(f"β yt-dlp: Error creating instance - {str(e)}")
|
| 801 |
+
|
| 802 |
+
# Test Whisper functionality
|
| 803 |
+
if WHISPER_AVAILABLE:
|
| 804 |
+
try:
|
| 805 |
+
if WHISPER_TYPE == "openai-whisper":
|
| 806 |
+
import whisper
|
| 807 |
+
test_results.append("β
Whisper: OpenAI Whisper can be imported")
|
| 808 |
+
elif WHISPER_TYPE == "transformers":
|
| 809 |
+
from transformers import pipeline
|
| 810 |
+
test_results.append("β
Whisper: Transformers Whisper can be imported")
|
| 811 |
+
except Exception as e:
|
| 812 |
+
test_results.append(f"β Whisper: Error testing - {str(e)}")
|
| 813 |
+
|
| 814 |
+
# Test file operations
|
| 815 |
+
try:
|
| 816 |
+
temp_file = tempfile.mktemp()
|
| 817 |
+
with open(temp_file, 'w') as f:
|
| 818 |
+
f.write("test")
|
| 819 |
+
os.remove(temp_file)
|
| 820 |
+
test_results.append("β
File operations: Working")
|
| 821 |
+
except Exception as e:
|
| 822 |
+
test_results.append(f"β File operations: Error - {str(e)}")
|
| 823 |
+
|
| 824 |
+
test_results.append("\nπ‘ If you see any β errors above, install missing packages:")
|
| 825 |
+
test_results.append("pip install yt-dlp openai-whisper torch torchaudio")
|
| 826 |
+
|
| 827 |
+
return gr.update(value="\n".join(test_results), visible=True)
|
| 828 |
+
|
| 829 |
check_req_btn.click(
|
| 830 |
fn=show_requirements,
|
| 831 |
outputs=[requirements_output]
|
| 832 |
)
|
| 833 |
|
| 834 |
+
test_btn.click(
|
| 835 |
+
fn=test_system,
|
| 836 |
+
outputs=[test_output]
|
| 837 |
+
)
|
| 838 |
+
|
| 839 |
process_btn.click(
|
| 840 |
fn=process_video,
|
| 841 |
inputs=[url_input, cookies_input],
|