Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -576,8 +576,18 @@ def audio_transcription_tool(file_path: str) -> str:
|
|
| 576 |
return f"Error: Audio file not found: '{file_path}'"
|
| 577 |
|
| 578 |
try:
|
| 579 |
-
transcription = asr_pipeline(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
result_text = transcription.get("text", "")
|
|
|
|
|
|
|
|
|
|
| 581 |
|
| 582 |
if not result_text:
|
| 583 |
return "Error: Transcription empty."
|
|
@@ -672,110 +682,94 @@ class YoutubeInput(BaseModel):
|
|
| 672 |
@tool(args_schema=YoutubeInput)
|
| 673 |
def get_youtube_transcript(video_url: str) -> str:
|
| 674 |
"""
|
| 675 |
-
Fetches YouTube video transcript
|
| 676 |
-
|
| 677 |
"""
|
| 678 |
if not video_url:
|
| 679 |
return "Error: Invalid URL."
|
| 680 |
|
| 681 |
-
print(f"πΊ YouTube transcript: {video_url}")
|
| 682 |
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
video_id = None
|
| 688 |
-
if "watch?v=" in video_url:
|
| 689 |
-
video_id = video_url.split("v=")[1].split("&")[0]
|
| 690 |
-
elif "youtu.be/" in video_url:
|
| 691 |
-
video_id = video_url.split("youtu.be/")[1].split("?")[0]
|
| 692 |
-
|
| 693 |
-
if not video_id:
|
| 694 |
-
return f"Error: Could not extract video ID from URL."
|
| 695 |
-
|
| 696 |
-
cmd = [
|
| 697 |
-
'yt-dlp',
|
| 698 |
-
'--skip-download',
|
| 699 |
-
'--write-auto-subs',
|
| 700 |
-
'--write-subs',
|
| 701 |
-
'--sub-lang', 'en',
|
| 702 |
-
'--sub-format', 'vtt',
|
| 703 |
-
'--output', video_id,
|
| 704 |
-
video_url
|
| 705 |
-
]
|
| 706 |
-
|
| 707 |
-
print(f"π§ Running yt-dlp (attempt {attempt + 1}/{max_retries})...")
|
| 708 |
-
result = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
|
| 709 |
-
|
| 710 |
-
if result.returncode != 0:
|
| 711 |
-
stderr = result.stderr
|
| 712 |
-
|
| 713 |
-
# Check for network errors
|
| 714 |
-
if 'Failed to resolve' in stderr or 'No address associated' in stderr:
|
| 715 |
-
if attempt < max_retries - 1:
|
| 716 |
-
print(f"β οΈ Network error, retrying...")
|
| 717 |
-
time.sleep(2 ** attempt)
|
| 718 |
-
continue
|
| 719 |
-
return "N/A - YouTube is inaccessible due to network issues."
|
| 720 |
-
|
| 721 |
-
return f"Error: Could not fetch subtitles - {stderr[:200]}"
|
| 722 |
-
|
| 723 |
-
# Find subtitle file
|
| 724 |
-
import glob
|
| 725 |
-
vtt_files = glob.glob(f"{video_id}*.vtt")
|
| 726 |
-
|
| 727 |
-
if not vtt_files:
|
| 728 |
-
return "N/A - No English subtitles found for this video."
|
| 729 |
-
|
| 730 |
-
subtitle_file = vtt_files[0]
|
| 731 |
-
print(f"β Found subtitle file: {subtitle_file}")
|
| 732 |
-
|
| 733 |
-
# Parse VTT
|
| 734 |
-
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
| 735 |
-
content = f.read()
|
| 736 |
-
|
| 737 |
-
lines = content.split('\n')
|
| 738 |
-
transcript_parts = []
|
| 739 |
-
|
| 740 |
-
for line in lines:
|
| 741 |
-
line = line.strip()
|
| 742 |
-
if (line and
|
| 743 |
-
not line.startswith('WEBVTT') and
|
| 744 |
-
not '-->' in line and
|
| 745 |
-
not line.isdigit() and
|
| 746 |
-
not line.startswith('Kind:') and
|
| 747 |
-
not line.startswith('Language:')):
|
| 748 |
-
transcript_parts.append(line)
|
| 749 |
-
|
| 750 |
-
full_transcript = " ".join(transcript_parts)
|
| 751 |
-
|
| 752 |
-
# Cleanup
|
| 753 |
-
for vtt_file in vtt_files:
|
| 754 |
-
try:
|
| 755 |
-
os.remove(vtt_file)
|
| 756 |
-
except:
|
| 757 |
-
pass
|
| 758 |
-
|
| 759 |
-
if not full_transcript:
|
| 760 |
-
return "Error: Transcript was empty."
|
| 761 |
-
|
| 762 |
-
print(f"β Transcript extracted: {len(full_transcript)} chars")
|
| 763 |
-
return f"Transcript:\n{truncate_if_needed(full_transcript)}"
|
| 764 |
-
|
| 765 |
-
except subprocess.TimeoutExpired:
|
| 766 |
-
if attempt < max_retries - 1:
|
| 767 |
-
continue
|
| 768 |
-
return "N/A - YouTube request timed out."
|
| 769 |
-
except FileNotFoundError:
|
| 770 |
-
return "Error: yt-dlp not installed."
|
| 771 |
-
except Exception as e:
|
| 772 |
-
if attempt < max_retries - 1:
|
| 773 |
-
time.sleep(2 ** attempt)
|
| 774 |
-
continue
|
| 775 |
-
print(f"β Error: {str(e)}")
|
| 776 |
-
return f"Error: {str(e)}"
|
| 777 |
|
| 778 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 779 |
|
| 780 |
|
| 781 |
class ScrapeInput(BaseModel):
|
|
@@ -1448,7 +1442,7 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
|
|
| 1448 |
self.llm_with_tools = ChatGroq(
|
| 1449 |
temperature=0,
|
| 1450 |
groq_api_key=GROQ_API_KEY,
|
| 1451 |
-
model_name="
|
| 1452 |
max_tokens=4096,
|
| 1453 |
timeout=60
|
| 1454 |
).bind_tools(self.tools, tool_choice="auto")
|
|
|
|
| 576 |
return f"Error: Audio file not found: '{file_path}'"
|
| 577 |
|
| 578 |
try:
|
| 579 |
+
transcription = asr_pipeline(
|
| 580 |
+
str(audio_path),
|
| 581 |
+
return_timestamps=True, # β Add this!
|
| 582 |
+
chunk_length_s=30, # β Process in 30-second chunks
|
| 583 |
+
stride_length_s=5 # β 5-second overlap between chunks
|
| 584 |
+
)
|
| 585 |
+
|
| 586 |
+
# Extract just the text (ignore timestamps)
|
| 587 |
result_text = transcription.get("text", "")
|
| 588 |
+
# OR if you want to see the chunks:
|
| 589 |
+
# chunks = transcription.get("chunks", [])
|
| 590 |
+
# result_text = " ".join([chunk["text"] for chunk in chunks])
|
| 591 |
|
| 592 |
if not result_text:
|
| 593 |
return "Error: Transcription empty."
|
|
|
|
| 682 |
@tool(args_schema=YoutubeInput)
|
| 683 |
def get_youtube_transcript(video_url: str) -> str:
|
| 684 |
"""
|
| 685 |
+
Fetches YouTube video transcript/captions using YouTube Data API v3.
|
| 686 |
+
Much more reliable than yt-dlp on cloud environments.
|
| 687 |
"""
|
| 688 |
if not video_url:
|
| 689 |
return "Error: Invalid URL."
|
| 690 |
|
| 691 |
+
print(f"πΊ YouTube transcript (API v3): {video_url}")
|
| 692 |
|
| 693 |
+
# Get API key
|
| 694 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
| 695 |
+
if not YOUTUBE_API_KEY:
|
| 696 |
+
return "Error: YOUTUBE_API_KEY not set in Space secrets."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
+
try:
|
| 699 |
+
# Extract video ID
|
| 700 |
+
video_id = None
|
| 701 |
+
if "watch?v=" in video_url:
|
| 702 |
+
video_id = video_url.split("v=")[1].split("&")[0]
|
| 703 |
+
elif "youtu.be/" in video_url:
|
| 704 |
+
video_id = video_url.split("youtu.be/")[1].split("?")[0]
|
| 705 |
+
|
| 706 |
+
if not video_id:
|
| 707 |
+
return "Error: Could not extract video ID from URL."
|
| 708 |
+
|
| 709 |
+
print(f" Video ID: {video_id}")
|
| 710 |
+
|
| 711 |
+
# Initialize YouTube API
|
| 712 |
+
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
| 713 |
+
|
| 714 |
+
# Get caption tracks
|
| 715 |
+
captions_response = youtube.captions().list(
|
| 716 |
+
part='snippet',
|
| 717 |
+
videoId=video_id
|
| 718 |
+
).execute()
|
| 719 |
+
|
| 720 |
+
if not captions_response.get('items'):
|
| 721 |
+
return "N/A - No captions available for this video."
|
| 722 |
+
|
| 723 |
+
# Find English caption track
|
| 724 |
+
caption_id = None
|
| 725 |
+
for caption in captions_response['items']:
|
| 726 |
+
lang = caption['snippet'].get('language', '')
|
| 727 |
+
if lang.startswith('en'):
|
| 728 |
+
caption_id = caption['id']
|
| 729 |
+
print(f" Found English captions: {lang}")
|
| 730 |
+
break
|
| 731 |
+
|
| 732 |
+
if not caption_id:
|
| 733 |
+
# Try first available caption
|
| 734 |
+
caption_id = captions_response['items'][0]['id']
|
| 735 |
+
print(f" Using first available caption track")
|
| 736 |
+
|
| 737 |
+
# Download caption content
|
| 738 |
+
caption_content = youtube.captions().download(
|
| 739 |
+
id=caption_id,
|
| 740 |
+
tfmt='srt' # or 'vtt'
|
| 741 |
+
).execute()
|
| 742 |
+
|
| 743 |
+
# Parse SRT format (remove timestamps and numbers)
|
| 744 |
+
lines = caption_content.decode('utf-8').split('\n')
|
| 745 |
+
transcript_parts = []
|
| 746 |
+
|
| 747 |
+
for line in lines:
|
| 748 |
+
line = line.strip()
|
| 749 |
+
# Skip line numbers, timestamps, and empty lines
|
| 750 |
+
if (line and
|
| 751 |
+
not line.isdigit() and
|
| 752 |
+
'-->' not in line):
|
| 753 |
+
transcript_parts.append(line)
|
| 754 |
+
|
| 755 |
+
full_transcript = ' '.join(transcript_parts)
|
| 756 |
+
|
| 757 |
+
if not full_transcript:
|
| 758 |
+
return "Error: Transcript was empty."
|
| 759 |
+
|
| 760 |
+
print(f"β Transcript retrieved: {len(full_transcript)} chars")
|
| 761 |
+
return f"Transcript:\n{truncate_if_needed(full_transcript)}"
|
| 762 |
+
|
| 763 |
+
except HttpError as e:
|
| 764 |
+
if e.resp.status == 403:
|
| 765 |
+
return "Error: YouTube API quota exceeded or captions are disabled for this video."
|
| 766 |
+
elif e.resp.status == 404:
|
| 767 |
+
return "Error: Video not found or captions not available."
|
| 768 |
+
else:
|
| 769 |
+
return f"YouTube API error: {str(e)}"
|
| 770 |
+
except Exception as e:
|
| 771 |
+
print(f"β Error: {str(e)}")
|
| 772 |
+
return f"Error: {str(e)}"
|
| 773 |
|
| 774 |
|
| 775 |
class ScrapeInput(BaseModel):
|
|
|
|
| 1442 |
self.llm_with_tools = ChatGroq(
|
| 1443 |
temperature=0,
|
| 1444 |
groq_api_key=GROQ_API_KEY,
|
| 1445 |
+
model_name="llama-3.3-70b-versatile",
|
| 1446 |
max_tokens=4096,
|
| 1447 |
timeout=60
|
| 1448 |
).bind_tools(self.tools, tool_choice="auto")
|