Spaces:

Amr-h
/

English_Dialect_Classifier

Sleeping

App Files Files Community

Amr-h commited on May 31, 2025

Commit

0ca3a79

1 Parent(s): d38e095

remove youtube

Browse files

Files changed (2) hide show

app.py +77 -92
audio_extractor.py +73 -298

app.py CHANGED Viewed

@@ -2,12 +2,10 @@ import streamlit as st
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
 import time
 import os
 from pathlib import Path
 import tempfile
-import shutil
 # Import your existing modules
 try:
@@ -19,7 +17,7 @@ except ImportError as e:
 # Page configuration
 st.set_page_config(
-    page_title="🎤 Accent Analyzer",
     page_icon="🎤",
     layout="wide",
     initial_sidebar_state="expanded"
@@ -35,12 +33,6 @@ st.markdown("""
         font-weight: bold;
         margin-bottom: 2rem;
     }
-    .metric-container {
-        background-color: #f0f2f6;
-        padding: 1rem;
-        border-radius: 0.5rem;
-        margin: 0.5rem 0;
-    }
     .success-box {
         background-color: #d4edda;
         border: 1px solid #c3e6cb;
@@ -74,8 +66,6 @@ def initialize_session_state():
         st.session_state.analysis_results = None
     if 'processing' not in st.session_state:
         st.session_state.processing = False
-    if 'uploaded_file_path' not in st.session_state:
-        st.session_state.uploaded_file_path = None
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to temporary directory"""
@@ -90,31 +80,31 @@ def save_uploaded_file(uploaded_file):
         return None
 def create_confidence_chart(chunk_results):
-    """Create confidence score chart for chunks"""
     if not chunk_results:
         return None
     chunk_data = []
-    for result in chunk_results:
         chunk_data.append({
-            'Chunk': result['chunk'],
             'Confidence': result['confidence'],
             'Accent': result['accent'],
-            'Is Confident': '✓ Confident' if result['is_confident'] else '✗ Low Confidence'
         })
     df = pd.DataFrame(chunk_data)
     fig = px.bar(df,
-                 x='Chunk',
                  y='Confidence',
                  color='Is Confident',
                  hover_data=['Accent'],
-                 title='Confidence Scores by Chunk',
-                 color_discrete_map={'✓ Confident': '#28a745', '✗ Low Confidence': '#dc3545'})
     fig.update_layout(
-        xaxis_title="Chunk Number",
         yaxis_title="Confidence Score",
         showlegend=True,
         height=400
@@ -156,30 +146,30 @@ def display_results(results):
     with col1:
         st.metric(
-            label="🎯 Confidence Score",
-            value=f"{results['confidence_score']:.3f}",
-            delta=f"{results['confidence_percentage']}"
         )
     with col2:
         st.metric(
-            label="📊 Chunks Processed",
-            value=f"{results['processed_chunks_count']}/{results['available_chunks_count']}",
-            delta="Early stopped" if results.get('early_stopped', False) else "Complete"
         )
     with col3:
         st.metric(
-            label="✅ Confident Predictions",
             value=results['confident_chunks_count'],
-            delta=f"{(results['confident_chunks_count']/results['processed_chunks_count']*100):.1f}%"
         )
     with col4:
         st.metric(
             label="⏱️ Processing Time",
             value=f"{results['processing_time']:.1f}s",
-            delta=f"{results.get('duration_minutes', 0):.1f}min video"
         )
     # Detailed Analysis
@@ -198,49 +188,45 @@ def display_results(results):
     with chart_col2:
         confident_chart = create_accent_distribution_chart(
             results['confident_accent_counts'],
-            "Confident Predictions Distribution"
         )
         if confident_chart:
             st.plotly_chart(confident_chart, use_container_width=True)
-    # All predictions distribution
-    if results['all_accent_counts'] != results['confident_accent_counts']:
-        st.subheader("📊 All Predictions (Including Low Confidence)")
-        all_chart = create_accent_distribution_chart(
-            results['all_accent_counts'],
-            "All Predictions Distribution"
-        )
-        if all_chart:
-            st.plotly_chart(all_chart, use_container_width=True)
-    # Detailed chunk results table
-    with st.expander("🔍 View Detailed Chunk Results"):
-        chunk_df = pd.DataFrame(results['chunk_results'])
-        st.dataframe(chunk_df, use_container_width=True)
     # Summary statistics
     with st.expander("📋 Summary Statistics"):
         col1, col2 = st.columns(2)
         with col1:
-            st.write("**Confident Predictions:**")
-            for accent, count in results['confident_accent_counts'].items():
-                percentage = (count / results['confident_chunks_count']) * 100
-                st.write(f"• {accent}: {count} chunks ({percentage:.1f}%)")
         with col2:
             st.write("**All Predictions:**")
-            for accent, count in results['all_accent_counts'].items():
-                percentage = (count / results['processed_chunks_count']) * 100
-                st.write(f"• {accent}: {count} chunks ({percentage:.1f}%)")
 def main():
     """Main Streamlit application"""
     initialize_session_state()
     # Header
-    st.markdown('<h1 class="main-header">🎤 Accent Analyzer</h1>', unsafe_allow_html=True)
-    st.markdown("Analyze accents from video files, URLs, or audio sources using advanced AI models.")
     # Sidebar configuration
     st.sidebar.header("⚙️ Configuration")
@@ -251,13 +237,7 @@ def main():
         max_value=0.9,
         value=0.6,
         step=0.05,
-        help="Only predictions above this threshold are considered confident"
-    )
-    early_stopping = st.sidebar.checkbox(
-        "Enable Early Stopping",
-        value=True,
-        help="Stop processing when 3 consecutive confident predictions agree"
     )
     # Input section
@@ -265,30 +245,30 @@ def main():
     input_method = st.radio(
         "Choose input method:",
-        ["URL (YouTube, Loom, etc.)", "Upload File"],
         horizontal=True
     )
     source = None
-    if input_method == "URL (YouTube, Loom, etc.)":
         source = st.text_input(
             "Enter video URL:",
-            placeholder="https://www.youtube.com/watch?v=...",
-            help="Supports YouTube, Loom, and direct media URLs"
         )
         # URL examples
         with st.expander("🔗 Supported URL Examples"):
-            st.write("• YouTube: `https://www.youtube.com/watch?v=VIDEO_ID`")
-            st.write("• YouTube Shorts: `https://www.youtube.com/shorts/VIDEO_ID`")
-            st.write("• Loom: `https://www.loom.com/share/VIDEO_ID`")
-            st.write("• Direct media files: `https://example.com/video.mp4`")
     else:  # Upload File
         uploaded_file = st.file_uploader(
             "Choose a video or audio file",
-            type=['mp4', 'webm', 'avi', 'mov', 'mkv', 'm4v', '3gp', 'mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac'],
             help="Upload video or audio files for accent analysis"
         )
@@ -296,16 +276,17 @@ def main():
             # Save uploaded file
             with st.spinner("Saving uploaded file..."):
                 source = save_uploaded_file(uploaded_file)
-                st.session_state.uploaded_file_path = source
             if source:
                 st.success(f"✅ File uploaded: {uploaded_file.name}")
             else:
                 st.error("❌ Failed to save uploaded file")
     # Analysis button
     analyze_button = st.button(
-        "🚀 Start Analysis",
         type="primary",
         disabled=not source or st.session_state.processing,
         use_container_width=True
@@ -321,15 +302,15 @@ def main():
         try:
             status_text.text("🎵 Extracting audio...")
-            progress_bar.progress(20)
-            status_text.text("🧠 Loading AI model...")
-            progress_bar.progress(40)
-            status_text.text("🔍 Analyzing accent...")
-            progress_bar.progress(60)
-            # Run analysis
             results = analyze_video_accent(source, confidence_threshold=confidence_threshold)
             progress_bar.progress(100)
@@ -353,37 +334,41 @@ def main():
     # Display results
     if st.session_state.analysis_results:
-        st.header("📊 Results")
         display_results(st.session_state.analysis_results)
     # Information section
     with st.expander("ℹ️ About This Tool"):
         st.markdown("""
-        **Accent Analyzer** uses advanced machine learning models to identify accents from speech in videos and audio files.
-        **Features:**
-        - Supports multiple input sources (URLs, file uploads)
-        - Smart chunking for efficient processing
-        - Confidence-based predictions
-        - Early stopping for faster results
-        - Detailed analysis with visualizations
         **Supported Formats:**
-        - **Video:** MP4, WebM, AVI, MOV, MKV, M4V, 3GP
         - **Audio:** MP3, WAV, M4A, AAC, OGG, FLAC
-        - **URLs:** YouTube, Loom, direct media links
         **How it works:**
-        1. Audio is extracted from the source
-        2. Audio is chunked into smaller segments
-        3. Each chunk is analyzed for accent features
-        4. Results are aggregated with confidence scoring
-        5. Final prediction is made based on confident predictions
         """)
     # Footer
     st.markdown("---")
-    st.markdown("Made with ❤️ using Streamlit and SpeechBrain")
 if __name__ == "__main__":
     main()

 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import time
 import os
 from pathlib import Path
 import tempfile
 # Import your existing modules
 try:
 # Page configuration
 st.set_page_config(
+    page_title="🎤 English Accent Analyzer",
     page_icon="🎤",
     layout="wide",
     initial_sidebar_state="expanded"
         font-weight: bold;
         margin-bottom: 2rem;
     }
     .success-box {
         background-color: #d4edda;
         border: 1px solid #c3e6cb;
         st.session_state.analysis_results = None
     if 'processing' not in st.session_state:
         st.session_state.processing = False
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to temporary directory"""
         return None
 def create_confidence_chart(chunk_results):
+    """Create confidence score chart for 1-minute chunks"""
     if not chunk_results:
         return None
     chunk_data = []
+    for i, result in enumerate(chunk_results):
         chunk_data.append({
+            'Minute': f"Min {i+1}",
             'Confidence': result['confidence'],
             'Accent': result['accent'],
+            'Is Confident': '✓ High Confidence' if result['is_confident'] else '✗ Low Confidence'
         })
     df = pd.DataFrame(chunk_data)
     fig = px.bar(df,
+                 x='Minute',
                  y='Confidence',
                  color='Is Confident',
                  hover_data=['Accent'],
+                 title='Confidence Scores by Minute',
+                 color_discrete_map={'✓ High Confidence': '#28a745', '✗ Low Confidence': '#dc3545'})
     fig.update_layout(
+        xaxis_title="Time Segment",
         yaxis_title="Confidence Score",
         showlegend=True,
         height=400
     with col1:
         st.metric(
+            label="🎯 Overall Confidence",
+            value=f"{results['confidence_score']:.1%}",
+            help="Overall confidence in the prediction"
         )
     with col2:
         st.metric(
+            label="📊 Minutes Analyzed",
+            value=f"{results['processed_chunks_count']} min",
+            delta=f"of {results.get('duration_minutes', 0):.1f} min total"
         )
     with col3:
         st.metric(
+            label="✅ High Confidence Segments",
             value=results['confident_chunks_count'],
+            delta=f"{(results['confident_chunks_count']/results['processed_chunks_count']*100):.0f}%" if results['processed_chunks_count'] > 0 else "0%"
         )
     with col4:
         st.metric(
             label="⏱️ Processing Time",
             value=f"{results['processing_time']:.1f}s",
+            help="Time taken to analyze the audio"
         )
     # Detailed Analysis
     with chart_col2:
         confident_chart = create_accent_distribution_chart(
             results['confident_accent_counts'],
+            "High Confidence Predictions"
         )
         if confident_chart:
             st.plotly_chart(confident_chart, use_container_width=True)
+    # Detailed results table
+    with st.expander("🔍 View Minute-by-Minute Results"):
+        if results['chunk_results']:
+            chunk_df = pd.DataFrame(results['chunk_results'])
+            chunk_df.index = [f"Minute {i+1}" for i in range(len(chunk_df))]
+            st.dataframe(chunk_df, use_container_width=True)
     # Summary statistics
     with st.expander("📋 Summary Statistics"):
         col1, col2 = st.columns(2)
         with col1:
+            st.write("**High Confidence Predictions:**")
+            if results['confident_accent_counts']:
+                for accent, count in results['confident_accent_counts'].items():
+                    percentage = (count / results['confident_chunks_count']) * 100
+                    st.write(f"• {accent}: {count} segments ({percentage:.1f}%)")
+            else:
+                st.write("No high confidence predictions")
         with col2:
             st.write("**All Predictions:**")
+            if results['all_accent_counts']:
+                for accent, count in results['all_accent_counts'].items():
+                    percentage = (count / results['processed_chunks_count']) * 100
+                    st.write(f"• {accent}: {count} segments ({percentage:.1f}%)")
 def main():
     """Main Streamlit application"""
     initialize_session_state()
     # Header
+    st.markdown('<h1 class="main-header">🎤 English Accent Analyzer</h1>', unsafe_allow_html=True)
+    st.markdown("Analyze English accents from video files, Loom videos, or direct media URLs. Audio is processed in 1-minute segments for detailed analysis.")
     # Sidebar configuration
     st.sidebar.header("⚙️ Configuration")
         max_value=0.9,
         value=0.6,
         step=0.05,
+        help="Only predictions above this threshold are considered high confidence"
     )
     # Input section
     input_method = st.radio(
         "Choose input method:",
+        ["URL (Loom or Direct Link)", "Upload File"],
         horizontal=True
     )
     source = None
+    if input_method == "URL (Loom or Direct Link)":
         source = st.text_input(
             "Enter video URL:",
+            placeholder="https://www.loom.com/share/...",
+            help="Supports Loom videos and direct media URLs"
         )
         # URL examples
         with st.expander("🔗 Supported URL Examples"):
+            st.write("• **Loom:** `https://www.loom.com/share/VIDEO_ID`")
+            st.write("• **Direct MP4:** `https://example.com/video.mp4`")
+            st.write("• **Direct audio:** `https://example.com/audio.mp3`")
+            st.markdown('<div class="info-box">📝 <strong>Note:</strong> YouTube URLs are not supported to avoid authentication issues in deployment.</div>', unsafe_allow_html=True)
     else:  # Upload File
         uploaded_file = st.file_uploader(
             "Choose a video or audio file",
+            type=['mp4', 'webm', 'avi', 'mov', 'mkv', 'm4v', 'mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac'],
             help="Upload video or audio files for accent analysis"
         )
             # Save uploaded file
             with st.spinner("Saving uploaded file..."):
                 source = save_uploaded_file(uploaded_file)
             if source:
                 st.success(f"✅ File uploaded: {uploaded_file.name}")
+                file_size = len(uploaded_file.getbuffer()) / 1024 / 1024
+                st.info(f"📊 File size: {file_size:.1f}MB")
             else:
                 st.error("❌ Failed to save uploaded file")
     # Analysis button
     analyze_button = st.button(
+        "🚀 Start Accent Analysis",
         type="primary",
         disabled=not source or st.session_state.processing,
         use_container_width=True
         try:
             status_text.text("🎵 Extracting audio...")
+            progress_bar.progress(25)
+            status_text.text("🧩 Creating 1-minute segments...")
+            progress_bar.progress(50)
+            status_text.text("🧠 Analyzing accent patterns...")
+            progress_bar.progress(75)
+            # Run analysis with the confidence threshold
             results = analyze_video_accent(source, confidence_threshold=confidence_threshold)
             progress_bar.progress(100)
     # Display results
     if st.session_state.analysis_results:
+        st.header("📊 Analysis Results")
         display_results(st.session_state.analysis_results)
     # Information section
     with st.expander("ℹ️ About This Tool"):
         st.markdown("""
+        **English Accent Analyzer** uses advanced machine learning models to identify English accents from speech.
+        **Key Features:**
+        - 🎯 **1-minute segments:** Audio is processed in 1-minute chunks for detailed analysis
+        - 🎤 **Accent detection:** Identifies British, American, Australian, and other English accents
+        - 📊 **Confidence scoring:** Provides reliability scores for each prediction
+        - 🔗 **Multiple sources:** Supports Loom videos, direct URLs, and file uploads
         **Supported Formats:**
+        - **Video:** MP4, WebM, AVI, MOV, MKV, M4V
         - **Audio:** MP3, WAV, M4A, AAC, OGG, FLAC
+        - **URLs:** Loom videos, direct media links
         **How it works:**
+        1. Audio is extracted from your source
+        2. Audio is split into 1-minute segments
+        3. Each segment is analyzed for accent characteristics
+        4. Results are combined with confidence weighting
+        5. Final accent prediction is provided
+        **Best Results:**
+        - Use clear speech audio
+        - Longer videos provide more accurate results
+        - Multiple speakers may affect accuracy
         """)
     # Footer
     st.markdown("---")
+    st.markdown("🚀 **Deployment Ready:** Optimized for Hugging Face Spaces deployment")
 if __name__ == "__main__":
     main()

audio_extractor.py CHANGED Viewed

@@ -4,7 +4,6 @@ import tempfile
 import warnings
 import time
 import shutil
-import random
 import requests
 from urllib.parse import urlparse, unquote
 from pathlib import Path
@@ -30,24 +29,14 @@ def suppress_stdout_stderr():
             sys.stdout = old_stdout
             sys.stderr = old_stderr
-class RobustAudioExtractor:
     def __init__(self):
-        self.supported_video_formats = ['.mp4', '.webm', '.avi', '.mov', '.mkv', '.m4v', '.3gp']
         self.supported_audio_formats = ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac']
-        self.user_agents = [
-            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
-        ]
     def extract_audio_from_source(self, source):
-        """
-        Extract audio from various sources:
-        - File path (uploaded file)
-        - Direct media URL (MP4, etc.)
-        - Loom URL
-        - Other video hosting URLs
-        """
         start_time = time.time()
         # Check if source is a file path
@@ -65,9 +54,7 @@ class RobustAudioExtractor:
             print(f"🎥 Processing Loom URL: {source}")
             return self._extract_from_loom(source, start_time)
-        # Try with yt-dlp for other platforms (with robust error handling)
-        print(f"🌐 Processing URL with yt-dlp: {source}")
-        return self._extract_with_ytdlp_robust(source, start_time)
     def _is_file_path(self, source):
         """Check if source is a local file path"""
@@ -95,14 +82,13 @@ class RobustAudioExtractor:
         try:
             file_ext = Path(file_path).suffix.lower()
-            # If it's already an audio file, just return it
             if file_ext in self.supported_audio_formats:
                 if file_ext == '.wav':
                     end_time = time.time()
                     print(f"[⏱️] Audio file processing took {end_time - start_time:.2f} seconds.")
                     return file_path
                 else:
-                    # Convert to WAV
                     return self._convert_to_wav(file_path, start_time)
             # If it's a video file, extract audio
@@ -121,38 +107,30 @@ class RobustAudioExtractor:
         try:
             headers = {
-                'User-Agent': random.choice(self.user_agents),
                 'Accept': '*/*',
                 'Accept-Language': 'en-US,en;q=0.9',
-                'Accept-Encoding': 'gzip, deflate, br',
                 'Connection': 'keep-alive',
-                'Upgrade-Insecure-Requests': '1',
             }
-            response = requests.get(url, headers=headers, stream=True, timeout=30)
             response.raise_for_status()
-            # Determine file extension
-            content_type = response.headers.get('content-type', '').lower()
-            if 'video' in content_type:
-                if 'mp4' in content_type:
                     ext = '.mp4'
-                elif 'webm' in content_type:
-                    ext = '.webm'
-                else:
-                    ext = '.mp4'  # default
-            elif 'audio' in content_type:
-                if 'mpeg' in content_type or 'mp3' in content_type:
                     ext = '.mp3'
-                elif 'wav' in content_type:
-                    ext = '.wav'
                 else:
-                    ext = '.mp3'  # default
-            else:
-                # Try to get from URL
-                parsed_url = urlparse(url)
-                url_ext = Path(parsed_url.path).suffix.lower()
-                ext = url_ext if url_ext in self.supported_video_formats + self.supported_audio_formats else '.mp4'
             downloaded_file = os.path.join(temp_dir, f'downloaded{ext}')
@@ -179,163 +157,55 @@ class RobustAudioExtractor:
                 shutil.rmtree(temp_dir, ignore_errors=True)
             raise Exception(f"Failed to download direct media: {str(e)}")
-    def extract_audio_from_loom(url):
-        """Simple Loom audio extractor using yt-dlp"""
         temp_dir = tempfile.mkdtemp()
-        ydl_opts = {
-            'format': 'bestaudio/best',
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'wav',
-                'preferredquality': '192',
-            }],
-            'outtmpl': os.path.join(temp_dir, 'loom_audio.%(ext)s'),
-            'quiet': True,
-            'no_warnings': True,
-            'noplaylist': True,
-        }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
-        for f in os.listdir(temp_dir):
-            if f.endswith('.wav'):
-                return os.path.join(temp_dir, f)
-        raise Exception("Audio file not found in output.")
-    def _extract_with_ytdlp_robust(self, url, start_time):
-        """Robust yt-dlp extraction with multiple strategies"""
-        strategies = [
-            self._ytdlp_strategy_basic,
-            self._ytdlp_strategy_with_headers,
-            self._ytdlp_strategy_low_quality,
-            self._ytdlp_strategy_audio_only,
-        ]
-        for i, strategy in enumerate(strategies):
-            try:
-                print(f"Trying yt-dlp strategy {i+1}...")
-                result = strategy(url, start_time)
-                if result:
-                    return result
-                time.sleep(random.uniform(1, 3))
-            except Exception as e:
-                print(f"yt-dlp strategy {i+1} failed: {str(e)}")
-                continue
-        raise Exception("Failed to extract audio with all yt-dlp strategies")
-    def _ytdlp_strategy_basic(self, url, start_time):
-        """Basic yt-dlp strategy"""
-        temp_dir = tempfile.mkdtemp()
-        ydl_opts = {
-            'format': 'bestaudio[abr<=64]/worst',
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'wav',
-                'preferredquality': '192',
-            }],
-            'outtmpl': os.path.join(temp_dir, 'audio.%(ext)s'),
-            'quiet': True,
-            'no_warnings': True,
-            'noplaylist': True,
-        }
-        with suppress_stdout_stderr():
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-        return self._find_audio_file(temp_dir, start_time)
-    def _ytdlp_strategy_with_headers(self, url, start_time):
-        """yt-dlp with browser-like headers"""
-        temp_dir = tempfile.mkdtemp()
-        ydl_opts = {
-            'format': 'bestaudio[abr<=64]/worst',
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'wav',
-                'preferredquality': '192',
-            }],
-            'outtmpl': os.path.join(temp_dir, 'audio.%(ext)s'),
-            'quiet': True,
-            'no_warnings': True,
-            'noplaylist': True,
-            'http_headers': {
-                'User-Agent': random.choice(self.user_agents),
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.9',
-                'Accept-Encoding': 'gzip, deflate',
-                'Connection': 'keep-alive',
-            },
-            'sleep_interval': 1,
-            'max_sleep_interval': 3,
-        }
-        with suppress_stdout_stderr():
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-        return self._find_audio_file(temp_dir, start_time)
-    def _ytdlp_strategy_low_quality(self, url, start_time):
-        """yt-dlp with lowest quality to avoid detection"""
-        temp_dir = tempfile.mkdtemp()
-        ydl_opts = {
-            'format': 'worstaudio/worst',
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'wav',
-                'preferredquality': '128',
-            }],
-            'outtmpl': os.path.join(temp_dir, 'audio.%(ext)s'),
-            'quiet': True,
-            'no_warnings': True,
-            'noplaylist': True,
-            'sleep_interval': 2,
-            'max_sleep_interval': 5,
-        }
-        with suppress_stdout_stderr():
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-        return self._find_audio_file(temp_dir, start_time)
-    def _ytdlp_strategy_audio_only(self, url, start_time):
-        """yt-dlp targeting audio-only streams"""
-        temp_dir = tempfile.mkdtemp()
-        ydl_opts = {
-            'format': 'bestaudio',
-            'outtmpl': os.path.join(temp_dir, 'audio.%(ext)s'),
-            'postprocessors': [{
-                'key': 'FFmpegExtractAudio',
-                'preferredcodec': 'wav',
-                'preferredquality': '192',
-            }],
-            'prefer_ffmpeg': True,
-            'ignoreerrors': True,
-            'quiet': True,
-            'no_warnings': True,
-        }
-        with suppress_stdout_stderr():
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                ydl.download([url])
-        return self._find_audio_file(temp_dir, start_time)
     def _extract_audio_from_video_file(self, video_file, start_time):
-        """Extract audio from video file using FFmpeg"""
         temp_dir = tempfile.mkdtemp()
         output_audio = os.path.join(temp_dir, 'extracted_audio.wav')
         try:
             import subprocess
-            # Use FFmpeg to extract audio
             cmd = [
                 'ffmpeg', '-i', video_file,
                 '-vn',  # no video
@@ -353,16 +223,14 @@ class RobustAudioExtractor:
                 print(f"[⏱️] Audio extraction from video took {end_time - start_time:.2f} seconds.")
                 return output_audio
             else:
-                raise Exception(f"FFmpeg failed: {result.stderr}")
-        except FileNotFoundError:
-            # Fallback to torchaudio if FFmpeg not available
             return self._convert_to_wav(video_file, start_time)
-        except Exception as e:
-            raise Exception(f"Failed to extract audio from video: {str(e)}")
     def _convert_to_wav(self, audio_file, start_time):
-        """Convert audio file to WAV format"""
         try:
             waveform, sample_rate = torchaudio.load(audio_file)
@@ -386,65 +254,9 @@ class RobustAudioExtractor:
         except Exception as e:
             raise Exception(f"Failed to convert audio to WAV: {str(e)}")
-    def _find_audio_file(self, directory, start_time):
-        """Find the extracted audio file"""
-        audio_extensions = ['.wav', '.mp3', '.m4a', '.ogg', '.aac']
-        for file in os.listdir(directory):
-            if any(file.lower().endswith(ext) for ext in audio_extensions):
-                audio_path = os.path.join(directory, file)
-                # Convert to WAV if not already
-                if not file.lower().endswith('.wav'):
-                    return self._convert_to_wav(audio_path, start_time)
-                end_time = time.time()
-                print(f"[⏱️] Audio extraction took {end_time - start_time:.2f} seconds.")
-                return audio_path
-        raise Exception("No audio file found after extraction")
-# Update the main function to use the new extractor
-def extract_audio_from_video_url(video_source):
-    """
-    Main function that handles all types of video sources:
-    - File paths (uploaded files)
-    - Direct media URLs
-    - Loom URLs
-    - Other video platform URLs
-    """
-    extractor = RobustAudioExtractor()
-    return extractor.extract_audio_from_source(video_source)
-# Keep the existing chunking functions unchanged
-def smart_chunk_audio(waveform, sample_rate, duration_minutes):
-    """Smart chunking based on video duration"""
-    total_duration = waveform.size(1) / sample_rate
-    print(f"📏 Video duration: {total_duration/60:.1f} minutes")
-    if duration_minutes <= 1:
-        # Short videos: smaller chunks, process all
-        chunk_length_sec = 10
-        return chunk_audio_all(waveform, sample_rate, chunk_length_sec)
-    elif duration_minutes <= 5:
-        # Medium videos: normal chunks, skip some randomly
-        chunk_length_sec = 20
-        all_chunks = chunk_audio_all(waveform, sample_rate, chunk_length_sec)
-        # Keep 70% of chunks randomly
-        keep_ratio = 0.7
-        num_keep = max(1, int(len(all_chunks) * keep_ratio))
-        selected_chunks = random.sample(all_chunks, num_keep)
-        print(f"📦 Selected {len(selected_chunks)} out of {len(all_chunks)} chunks")
-        return selected_chunks
-    else:
-        # Long videos: strategic sampling from beginning, middle, end
-        chunk_length_sec = 25
-        return chunk_audio_strategic(waveform, sample_rate, chunk_length_sec)
-def chunk_audio_all(waveform, sample_rate, chunk_length_sec=20):
-    """Create all chunks from audio"""
     chunk_samples = chunk_length_sec * sample_rate
     total_samples = waveform.size(1)
     chunks = []
@@ -452,56 +264,19 @@ def chunk_audio_all(waveform, sample_rate, chunk_length_sec=20):
     for start in range(0, total_samples, chunk_samples):
         end = min(start + chunk_samples, total_samples)
         chunk = waveform[:, start:end]
-        if chunk.size(1) > sample_rate * 3:  # ignore very short chunks (3 sec minimum)
-            chunks.append(chunk)
-    return chunks
-def chunk_audio_strategic(waveform, sample_rate, chunk_length_sec=25):
-    """Strategic chunking for long videos - sample from beginning, middle, end"""
-    total_samples = waveform.size(1)
-    chunk_samples = chunk_length_sec * sample_rate
-    chunks = []
-    # Beginning: 2-3 chunks
-    beginning_chunks = min(3, total_samples // chunk_samples)
-    for i in range(beginning_chunks):
-        start = i * chunk_samples
-        end = min(start + chunk_samples, total_samples)
-        chunk = waveform[:, start:end]
-        if chunk.size(1) > sample_rate * 3:
             chunks.append(chunk)
-    # Middle: 2-3 chunks
-    middle_start = total_samples // 2 - chunk_samples
-    middle_chunks = min(3, 2)
-    for i in range(middle_chunks):
-        start = middle_start + (i * chunk_samples)
-        end = min(start + chunk_samples, total_samples)
-        if start >= 0 and start < total_samples:
-            chunk = waveform[:, start:end]
-            if chunk.size(1) > sample_rate * 3:
-                chunks.append(chunk)
-    # End: 2-3 chunks
-    end_start = total_samples - (3 * chunk_samples)
-    end_chunks = min(3, 3)
-    for i in range(end_chunks):
-        start = max(0, end_start + (i * chunk_samples))
-        end = min(start + chunk_samples, total_samples)
-        if start < total_samples:
-            chunk = waveform[:, start:end]
-            if chunk.size(1) > sample_rate * 3:
-                chunks.append(chunk)
-    print(f"📦 Strategic sampling: {len(chunks)} chunks from long video")
     return chunks
 def prepare_audio(video_source):
-    """Main function to extract and prepare audio chunks"""
     try:
         print(f"🎵 Extracting audio from source...")
-        audio_path = extract_audio_from_video_url(video_source)
         print(f"✅ Audio extracted to: {audio_path}")
         print(f"🎯 Loading and preparing audio...")
@@ -520,14 +295,14 @@ def prepare_audio(video_source):
         end = time.time()
         print(f"[⏱️] Audio preparation took {end - start:.2f} seconds.")
-        # Calculate duration and apply smart chunking
         duration_minutes = waveform.size(1) / sample_rate / 60
-        print(f"🧩 Smart chunking based on duration...")
         start = time.time()
-        chunks = smart_chunk_audio(waveform, sample_rate, duration_minutes)
         end = time.time()
-        print(f"[⏱️] Smart chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
         return {
             "success": True,

 import warnings
 import time
 import shutil
 import requests
 from urllib.parse import urlparse, unquote
 from pathlib import Path
             sys.stdout = old_stdout
             sys.stderr = old_stderr
+class SimpleAudioExtractor:
     def __init__(self):
+        self.supported_video_formats = ['.mp4', '.webm', '.avi', '.mov', '.mkv', '.m4v']
         self.supported_audio_formats = ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac']
+        self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
     def extract_audio_from_source(self, source):
+        """Extract audio from file path, direct media URL, or Loom URL"""
         start_time = time.time()
         # Check if source is a file path
             print(f"🎥 Processing Loom URL: {source}")
             return self._extract_from_loom(source, start_time)
+        raise Exception("Unsupported URL format. Please use Loom URLs or direct media links.")
     def _is_file_path(self, source):
         """Check if source is a local file path"""
         try:
             file_ext = Path(file_path).suffix.lower()
+            # If it's already an audio file, convert to WAV if needed
             if file_ext in self.supported_audio_formats:
                 if file_ext == '.wav':
                     end_time = time.time()
                     print(f"[⏱️] Audio file processing took {end_time - start_time:.2f} seconds.")
                     return file_path
                 else:
                     return self._convert_to_wav(file_path, start_time)
             # If it's a video file, extract audio
         try:
             headers = {
+                'User-Agent': self.user_agent,
                 'Accept': '*/*',
                 'Accept-Language': 'en-US,en;q=0.9',
                 'Connection': 'keep-alive',
             }
+            response = requests.get(url, headers=headers, stream=True, timeout=60)
             response.raise_for_status()
+            # Determine file extension from URL or content type
+            parsed_url = urlparse(url)
+            url_ext = Path(parsed_url.path).suffix.lower()
+            if url_ext in self.supported_video_formats + self.supported_audio_formats:
+                ext = url_ext
+            else:
+                # Try to get from content type
+                content_type = response.headers.get('content-type', '').lower()
+                if 'video' in content_type:
                     ext = '.mp4'
+                elif 'audio' in content_type:
                     ext = '.mp3'
                 else:
+                    ext = '.mp4'  # default
             downloaded_file = os.path.join(temp_dir, f'downloaded{ext}')
                 shutil.rmtree(temp_dir, ignore_errors=True)
             raise Exception(f"Failed to download direct media: {str(e)}")
+    def _extract_from_loom(self, url, start_time):
+        """Extract audio from Loom URL using yt-dlp"""
         temp_dir = tempfile.mkdtemp()
+        try:
+            ydl_opts = {
+                'format': 'bestaudio/best',
+                'postprocessors': [{
+                    'key': 'FFmpegExtractAudio',
+                    'preferredcodec': 'wav',
+                    'preferredquality': '192',
+                }],
+                'outtmpl': os.path.join(temp_dir, 'loom_audio.%(ext)s'),
+                'quiet': True,
+                'no_warnings': True,
+                'noplaylist': True,
+                'http_headers': {
+                    'User-Agent': self.user_agent,
+                },
+            }
+            with suppress_stdout_stderr():
+                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                    ydl.download([url])
+            # Find the extracted audio file
+            for file in os.listdir(temp_dir):
+                if file.endswith('.wav'):
+                    audio_path = os.path.join(temp_dir, file)
+                    end_time = time.time()
+                    print(f"[⏱️] Loom audio extraction took {end_time - start_time:.2f} seconds.")
+                    return audio_path
+            raise Exception("Audio file not found after Loom extraction")
+        except Exception as e:
+            if os.path.exists(temp_dir):
+                shutil.rmtree(temp_dir, ignore_errors=True)
+            raise Exception(f"Failed to extract from Loom: {str(e)}")
     def _extract_audio_from_video_file(self, video_file, start_time):
+        """Extract audio from video file using FFmpeg or torchaudio"""
         temp_dir = tempfile.mkdtemp()
         output_audio = os.path.join(temp_dir, 'extracted_audio.wav')
         try:
+            # Try FFmpeg first
             import subprocess
             cmd = [
                 'ffmpeg', '-i', video_file,
                 '-vn',  # no video
                 print(f"[⏱️] Audio extraction from video took {end_time - start_time:.2f} seconds.")
                 return output_audio
             else:
+                raise Exception("FFmpeg failed, trying torchaudio...")
+        except (FileNotFoundError, Exception):
+            # Fallback to torchaudio
             return self._convert_to_wav(video_file, start_time)
     def _convert_to_wav(self, audio_file, start_time):
+        """Convert audio file to WAV format using torchaudio"""
         try:
             waveform, sample_rate = torchaudio.load(audio_file)
         except Exception as e:
             raise Exception(f"Failed to convert audio to WAV: {str(e)}")
+def chunk_audio_1min(waveform, sample_rate):
+    """Create 1-minute chunks from audio"""
+    chunk_length_sec = 60  # 1 minute chunks
     chunk_samples = chunk_length_sec * sample_rate
     total_samples = waveform.size(1)
     chunks = []
     for start in range(0, total_samples, chunk_samples):
         end = min(start + chunk_samples, total_samples)
         chunk = waveform[:, start:end]
+        # Only include chunks that are at least 10 seconds long
+        if chunk.size(1) > sample_rate * 10:
             chunks.append(chunk)
+    print(f"📦 Created {len(chunks)} 1-minute chunks")
     return chunks
 def prepare_audio(video_source):
+    """Main function to extract and prepare 1-minute audio chunks"""
     try:
         print(f"🎵 Extracting audio from source...")
+        extractor = SimpleAudioExtractor()
+        audio_path = extractor.extract_audio_from_source(video_source)
         print(f"✅ Audio extracted to: {audio_path}")
         print(f"🎯 Loading and preparing audio...")
         end = time.time()
         print(f"[⏱️] Audio preparation took {end - start:.2f} seconds.")
+        # Calculate duration and create 1-minute chunks
         duration_minutes = waveform.size(1) / sample_rate / 60
+        print(f"🧩 Creating 1-minute chunks...")
         start = time.time()
+        chunks = chunk_audio_1min(waveform, sample_rate)
         end = time.time()
+        print(f"[⏱️] Chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
         return {
             "success": True,