Spaces:

samuelolubukun
/

contentnova-video-worker

Sleeping

App Files Files Community

samuelolubukun commited on Feb 12

Commit

2fe27d2

verified ·

1 Parent(s): 0aae98e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +3 -111
requirements.txt +0 -2

app.py CHANGED Viewed

@@ -14,12 +14,8 @@ import traceback
 import asyncio
 import httpx
 import re
-import pytube
-from pytube import YouTube
-from pytube.cli import on_progress
-import speech_recognition as sr
 from pydub import AudioSegment
-from pydub.silence import split_on_silence
 load_dotenv()
@@ -147,9 +143,6 @@ async def global_exception_handler(request, exc):
 class VideoURL(BaseModel):
     video_url: str
-class YouTubeURL(BaseModel):
-    url: str
 class ProcessVideoOptions(BaseModel):
     video_url: str
     aspect_ratio: str = "16:9"
@@ -379,106 +372,5 @@ def extract_audio(data: VideoURL):
     finally:
         for p in [local_input, local_output]:
             if os.path.exists(p): os.remove(p)
-@app.post("/extract-youtube-transcript")
-def extract_youtube_transcript(data: YouTubeURL):
-    """Extract transcript from a YouTube video by downloading audio and transcribing."""
-    local_audio = os.path.join(TEMP_DIR, f"yt_audio_{uuid.uuid4()}.wav")
-    local_mp3 = os.path.join(TEMP_DIR, f"yt_audio_{uuid.uuid4()}.mp3")
-    try:
-        # 1. Extract video ID
-        video_id = None
-        regexes = [
-            r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
-            r'youtu\.be\/([0-9A-Za-z_-]{11})',
-            r'embed\/([0-9A-Za-z_-]{11})'
-        ]
-        for regex in regexes:
-            match = re.search(regex, data.url)
-            if match:
-                video_id = match.group(1)
-                break
-        if not video_id:
-            raise HTTPException(status_code=400, detail="Invalid YouTube URL")
-        print(f"Downloading audio for video ID: {video_id}")
-        # 2. Download audio using pytube
-        yt = YouTube(data.url, on_progress_callback=on_progress)
-        # Target the audio stream (highest quality audio-only usually)
-        audio_stream = yt.streams.get_audio_only()
-        if not audio_stream:
-            raise HTTPException(status_code=500, detail="No audio streams found for this video")
-        print(f"Downloading stream: {audio_stream.abr}")
-        downloaded_file = audio_stream.download(output_path=TEMP_DIR, filename=f"yt_audio_{uuid.uuid4()}.mp3")
-        actual_mp3_path = downloaded_file
-        if not os.path.exists(actual_mp3_path):
-            raise HTTPException(status_code=500, detail="Failed to download YouTube audio")
-        # 3. Convert to WAV (16k, mono) for SpeechRecognition
-        print(f"Converting {actual_mp3_path} to WAV...")
-        cmd = [
-            "ffmpeg", "-i", actual_mp3_path,
-            "-ar", "16000", "-ac", "1", "-f", "wav",
-            local_audio
-        ]
-        run_ffmpeg(cmd)
-        # 4. Transcribe using SpeechRecognition
-        print("Transcribing audio...")
-        recognizer = sr.Recognizer()
-        # Load audio with pydub to handle potentially long files by chunking
-        audio = AudioSegment.from_wav(local_audio)
-        # Define chunk size (e.g., 30 seconds)
-        chunk_length_ms = 30000
-        chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
-        full_transcript = []
-        formatted_segments = []
-        for i, chunk in enumerate(chunks):
-            chunk_silent = AudioSegment.silent(duration=500) # add half second silence for padding
-            chunk_with_padding = chunk_silent + chunk + chunk_silent
-            chunk_path = os.path.join(TEMP_DIR, f"chunk_{i}_{uuid.uuid4()}.wav")
-            chunk_with_padding.export(chunk_path, format="wav")
-            with sr.AudioFile(chunk_path) as source:
-                audio_data = recognizer.record(source)
-                try:
-                    # Using Google Web Speech API (free, no key needed for small use)
-                    text = recognizer.recognize_google(audio_data)
-                    full_transcript.append(text)
-                    formatted_segments.append({
-                        "text": text,
-                        "offset": i * chunk_length_ms,
-                        "duration": chunk_length_ms
-                    })
-                except sr.UnknownValueError:
-                    print(f"Chunk {i}: Speech was unintelligible")
-                except sr.RequestError as e:
-                    print(f"Chunk {i}: Could not request results from Google Speech Recognition service; {e}")
-                finally:
-                    if os.path.exists(chunk_path): os.remove(chunk_path)
-        return {
-            "success": True,
-            "videoId": video_id,
-            "transcript": " ".join(full_transcript),
-            "segments": formatted_segments
-        }
-    except Exception as e:
-        print(f"Extraction error: {str(e)}")
-        print(traceback.format_exc())
-        raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        for p in [local_audio, local_mp3]:
-            if os.path.exists(p): os.remove(p)

 import asyncio
 import httpx
 import re
 from pydub import AudioSegment
+# Note: pydub used to be used for chunking YouTube audio, but that logic is now removed.
 load_dotenv()
 class VideoURL(BaseModel):
     video_url: str
 class ProcessVideoOptions(BaseModel):
     video_url: str
     aspect_ratio: str = "16:9"
     finally:
         for p in [local_input, local_output]:
             if os.path.exists(p): os.remove(p)
+        if os.path.exists(local_input):
+            os.remove(local_input)

requirements.txt CHANGED Viewed

@@ -6,6 +6,4 @@ python-dotenv==1.0.1
 pydantic==2.6.1
 requests==2.31.0
 httpx==0.26.0
-pytubefix==6.1.1
-SpeechRecognition==3.10.1
 pydub==0.25.1

 pydantic==2.6.1
 requests==2.31.0
 httpx==0.26.0
 pydub==0.25.1