Spaces:

JaganathC
/

Video_Summ

Running

App Files Files Community

JaganathC commited on Mar 15, 2025

Commit

58ddc5a

verified ·

1 Parent(s): fff62d1

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -35

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import json
 import time
 import langdetect
 import uuid
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load Hugging Face Token
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -15,12 +15,19 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 print("Starting the program...")
 model_path = "Qwen/Qwen2.5-7B-Instruct"
-# Check if CUDA is available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True).to(device).eval()
 print("Model successfully loaded.")
 def generate_unique_filename(extension):
@@ -33,40 +40,42 @@ def cleanup_files(*files):
             print(f"Removed file: {file}")
 def download_youtube_audio(url):
     print(f"Downloading audio from YouTube: {url}")
     output_path = generate_unique_filename(".wav")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'wav',
         }],
         'outtmpl': output_path,
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
     except Exception as e:
         return f"Error downloading audio: {str(e)}"
-    if os.path.exists(output_path + ".wav"):
-        os.rename(output_path + ".wav", output_path)
-    return output_path
 def transcribe_audio(file_path):
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
         print("Video file detected. Extracting audio using ffmpeg...")
         temp_audio = generate_unique_filename(".wav")
         command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
         subprocess.run(command, check=True)
-        file_path = temp_audio
     output_file = generate_unique_filename(".json")
     command = [
         "insanely-fast-whisper",
@@ -77,47 +86,61 @@ def transcribe_audio(file_path):
         "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
     try:
         subprocess.run(command, check=True)
     except Exception as e:
         return f"Error in transcription: {str(e)}"
-    with open(output_file, "r") as f:
-        transcription = json.load(f)
-    result = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
     cleanup_files(output_file)
     if temp_audio:
         cleanup_files(temp_audio)
-    return result
 def generate_summary_stream(transcription):
-    detected_language = langdetect.detect(transcription)
-    prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:
-    {transcription[:300000]}..."""
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    output_ids = model.generate(input_ids, max_length=500)
-    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    return response
 def process_youtube(url):
     if not url:
         return "Please enter a YouTube URL.", None
     audio_file = download_youtube_audio(url)
-    if "Error" in audio_file:
         return audio_file, None
     transcription = transcribe_audio(audio_file)
-    cleanup_files(audio_file)
     return transcription, None
 def process_uploaded_video(video_path):
     transcription = transcribe_audio(video_path)
     return transcription, None
@@ -126,7 +149,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # 🎥 Video Transcription and Smart Summary
     Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
     """)
     with gr.Tabs():
         with gr.TabItem("📤 Video Upload"):
             video_input = gr.Video()
@@ -135,11 +158,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.TabItem("🔗 YouTube Link"):
             url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
             url_button = gr.Button("🚀 Process URL")
     transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
     summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
     summary_button = gr.Button("📝 Generate Summary")
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])

 import time
 import langdetect
 import uuid
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # Load Hugging Face Token
 HF_TOKEN = os.getenv("HF_TOKEN")
 print("Starting the program...")
 model_path = "Qwen/Qwen2.5-7B-Instruct"
+# **Efficient Model Loading**
+bnb_config = BitsAndBytesConfig(load_in_8bit=True)  # Use 8-bit precision to reduce memory usage
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    quantization_config=bnb_config,  # Load in 8-bit to save memory
+    trust_remote_code=True
+).to(device).eval()
 print("Model successfully loaded.")
 def generate_unique_filename(extension):
             print(f"Removed file: {file}")
 def download_youtube_audio(url):
+    """Downloads audio from a YouTube video and converts it to WAV format."""
     print(f"Downloading audio from YouTube: {url}")
     output_path = generate_unique_filename(".wav")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'wav',
+            'preferredquality': '192',
         }],
         'outtmpl': output_path,
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
+        if os.path.exists(output_path + ".wav"):
+            os.rename(output_path + ".wav", output_path)  # Ensure correct naming
     except Exception as e:
         return f"Error downloading audio: {str(e)}"
+    return output_path if os.path.exists(output_path) else "Download Failed"
 def transcribe_audio(file_path):
+    """Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
         print("Video file detected. Extracting audio using ffmpeg...")
         temp_audio = generate_unique_filename(".wav")
         command = ["ffmpeg", "-i", file_path, "-q:a", "0", "-map", "a", temp_audio]
         subprocess.run(command, check=True)
+        file_path = temp_audio  # Use extracted audio file
     output_file = generate_unique_filename(".json")
     command = [
         "insanely-fast-whisper",
         "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
     try:
         subprocess.run(command, check=True)
     except Exception as e:
         return f"Error in transcription: {str(e)}"
+    # Process the JSON file in chunks to avoid memory overflow
+    result = []
+    try:
+        with open(output_file, "r") as f:
+            for line in f:
+                chunk = json.loads(line.strip())  # Read JSON line by line
+                result.append(chunk.get("text", ""))
+    except Exception as e:
+        return f"Error reading transcription file: {str(e)}"
     cleanup_files(output_file)
     if temp_audio:
         cleanup_files(temp_audio)
+    return " ".join(result)[:500000]  # Limit transcription size
 def generate_summary_stream(transcription):
+    """Summarizes the transcription efficiently to avoid memory overflow."""
+    detected_language = langdetect.detect(transcription[:1000])  # Detect using a smaller portion
+    # Use smaller chunks for processing
+    chunk_size = 2000
+    transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
+    summary_result = []
+    for chunk in transcript_chunks[:3]:  # Process only the first 3 chunks to avoid OOM
+        prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
+        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+        output_ids = model.generate(input_ids, max_length=300)  # Limit output size
+        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        summary_result.append(response)
+    return "\n\n".join(summary_result)
 def process_youtube(url):
+    """Handles YouTube video processing: downloads audio, transcribes it, and cleans up."""
     if not url:
         return "Please enter a YouTube URL.", None
     audio_file = download_youtube_audio(url)
+    if "Error" in audio_file or audio_file == "Download Failed":
         return audio_file, None
     transcription = transcribe_audio(audio_file)
+    cleanup_files(audio_file)  # Clean up the downloaded file
     return transcription, None
 def process_uploaded_video(video_path):
+    """Processes uploaded video file for transcription."""
     transcription = transcribe_audio(video_path)
     return transcription, None
     # 🎥 Video Transcription and Smart Summary
     Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
     """)
     with gr.Tabs():
         with gr.TabItem("📤 Video Upload"):
             video_input = gr.Video()
         with gr.TabItem("🔗 YouTube Link"):
             url_input = gr.Textbox(placeholder="https://www.youtube.com/watch?v=...")
             url_button = gr.Button("🚀 Process URL")
     transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
     summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
     summary_button = gr.Button("📝 Generate Summary")
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])