Spaces:

JaganathC
/

Video_Summ

Running

App Files Files Community

JaganathC commited on Mar 15, 2025

Commit

9431860

verified ·

1 Parent(s): 11bb14f

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -116

app.py CHANGED Viewed

@@ -1,132 +1,153 @@
-import gradio as gr
-import torch
-import yt_dlp
-import os
-import subprocess
-import json
-import moviepy.editor as mp
 import time
-import langdetect
-import uuid
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load Hugging Face Model
-HF_TOKEN = os.environ.get("HF_TOKEN")
-model_path = "Qwen/Qwen2.5-7B-Instruct"
-print(f"Loading model {model_path}...")
-tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
-model = model.eval()
-print("Model successfully loaded.")
-# Generate unique filenames
-def generate_unique_filename(extension):
-    return f"{uuid.uuid4()}{extension}"
-# Cleanup temporary files
-def cleanup_files(*files):
-    for file in files:
-        if file and os.path.exists(file):
-            os.remove(file)
-            print(f"Removed file: {file}")
-# Extract audio from video
-def extract_audio(video_path):
-    audio_path = generate_unique_filename(".wav")
-    try:
-        video = mp.VideoFileClip(video_path)
-        video.audio.write_audiofile(audio_path)
-        return audio_path
-    except Exception as e:
-        print(f"Error extracting audio: {e}")
-        return None
-# Download YouTube audio
-def download_youtube_audio(url):
-    output_path = generate_unique_filename(".wav")
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
-        'outtmpl': output_path,
-        'keepvideo': True,
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        ydl.download([url])
-    return output_path if os.path.exists(output_path) else None
-# Transcribe audio using Whisper
-def transcribe_audio(file_path):
-    if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
-        file_path = extract_audio(file_path)
-        if not file_path:
-            return "Audio extraction failed.", None
-    output_file = generate_unique_filename(".json")
-    command = [
-        "insanely-fast-whisper", "--file-name", file_path,
-        "--device-id", "cpu", "--model-name", "openai/whisper-large-v3",
-        "--task", "transcribe", "--timestamp", "chunk",
-        "--transcript-path", output_file
-    ]
-    result = subprocess.run(command, capture_output=True, text=True)
-    if result.returncode != 0:
-        return f"Transcription failed: {result.stderr}", None
-    if not os.path.exists(output_file):
-        return "Transcription file missing.", None
-    with open(output_file, "r") as f:
-        transcription = json.load(f)
-    text = transcription.get("text", " ".join([chunk["text"] for chunk in transcription.get("chunks", [])]))
-    cleanup_files(output_file, file_path)
-    return text, None
-# Generate summary using Qwen Model
-def generate_summary(transcription):
-    detected_language = langdetect.detect(transcription)
-    prompt = f"""Summarize the following transcription in 150-300 words:
-    Language: {detected_language}
-    {transcription[:100000]}"""
-    response, _ = model.chat(tokenizer, prompt, history=[])
-    return response
-# Process YouTube video
-def process_youtube(url):
-    if not url:
-        return "Please enter a valid YouTube URL.", None
-    audio_file = download_youtube_audio(url)
-    return transcribe_audio(audio_file) if audio_file else ("Download failed.", None)
-# Process uploaded video
-def process_uploaded_video(video_path):
-    return transcribe_audio(video_path)
-# Gradio Interface
-demo = gr.Blocks()
-with demo:
-    gr.Markdown("""
-        # 🎥 AI Video Transcription & Summary
-        Upload a video or provide a YouTube link to get a transcription and AI-generated summary.
-    """)
-    with gr.Tabs():
-        with gr.TabItem("📤 Video Upload"):
-            video_input = gr.File(label="Upload a video file")
-            video_button = gr.Button("🚀 Process Video")
-        with gr.TabItem("🔗 YouTube Link"):
-            url_input = gr.Textbox(label="Paste YouTube URL")
-            url_button = gr.Button("🚀 Process URL")
-    transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
-    summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
-    summary_button = gr.Button("📝 Generate Summary")
-    video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
-    url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
-    summary_button.click(generate_summary, inputs=[transcription_output], outputs=[summary_output])
-demo.launch()

+import streamlit as st
+from phi.agent import Agent
+from phi.model.google import Gemini
+from phi.tools.duckduckgo import DuckDuckGo
+from google.generativeai import upload_file, get_file
+import google.generativeai as genai
 import time
+from pathlib import Path
+import tempfile
+from dotenv import load_dotenv
+import os
+from phi.model.groq import Groq
+from phi.tools.youtube_tools import YouTubeTools
+# Load environment variables
+load_dotenv()
+# Configure API keys
+API_KEY = os.getenv("GOOGLE_API_KEY")
+groq_api_key = os.getenv("GROQ_API_KEY")
+if API_KEY:
+    genai.configure(api_key=API_KEY)
+# Page configuration
+st.set_page_config(
+    page_title="Multimodal AI Applications",
+    page_icon="🌐",
+    layout="wide"
+)
+# Custom CSS for UI Styling
+def load_custom_css():
+    st.markdown(
+        """
+        <style>
+            .stButton>button {
+                width: 100%;
+                height: 50px;
+                font-size: 18px;
+                font-weight: bold;
+                background: rgba(255, 255, 255, 0.2);
+                border-radius: 12px;
+                border: 2px solid rgba(255, 255, 255, 0.5);
+                box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.2);
+            }
+            .stTextInput>div>div>input, .stTextArea>div>textarea {
+                background: rgba(255, 255, 255, 0.1);
+                border-radius: 8px;
+                border: 1px solid rgba(255, 255, 255, 0.3);
+                color: white;
+                padding: 10px;
+            }
+        </style>
+        """,
+        unsafe_allow_html=True
+    )
+load_custom_css()
+st.markdown("# 🎥 Video Transcription and AI Summary")
+st.markdown("Upload a video or provide a YouTube link to get a transcription and AI-generated summary.")
+# Tabs for the two applications
+tab1, tab2 = st.tabs(["📤 Video Upload", "🔗 YouTube Video Analysis"])
+# Tab 1: Video Summarizer with Gemini
+with tab1:
+    st.subheader("Phidata Video AI Summarizer Agent 🎥")
+    @st.cache_resource
+    def initialize_agent():
+        return Agent(
+            name="Video AI Summarizer",
+            model=Gemini(id="gemini-2.0-flash-exp"),
+            tools=[DuckDuckGo()],
+            markdown=True,
+        )
+    multimodal_Agent = initialize_agent()
+    video_file = st.file_uploader("Upload a video file", type=['mp4'])
+    if video_file:
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video:
+            temp_video.write(video_file.read())
+            video_path = temp_video.name
+        st.video(video_path, format="video/mp4", start_time=0)
+        user_query = st.text_area("What insights are you seeking from the video?", "")
+        if st.button("🚀 Analyze Video", key="analyze_video_button"):
+            if not user_query:
+                st.warning("Please enter a question or insight to analyze the video.")
+            else:
+                try:
+                    with st.spinner("Processing video..."):
+                        processed_video = upload_file(video_path)
+                        while processed_video.state.name == "PROCESSING":
+                            time.sleep(1)
+                            processed_video = get_file(processed_video.name)
+                        prompt = f"""
+                            Analyze the uploaded video and provide a summary.
+                            Respond to: {user_query}
+                        """
+                        response = multimodal_Agent.run(prompt, videos=[processed_video])
+                        st.subheader("Analysis Result")
+                        st.markdown(response.content)
+                except Exception as error:
+                    st.error(f"Error: {error}")
+                finally:
+                    Path(video_path).unlink(missing_ok=True)
+    else:
+        st.info("Upload a video file to begin analysis.")
+# Tab 2: YouTube Video Analyzer with Groq
+with tab2:
+    st.subheader("YouTube Video Analyzer 🎬")
+    try:
+        youtube_agent = Agent(
+            model=Groq(id="llama3-8b-8192", api_key=groq_api_key),
+            tools=[YouTubeTools(), DuckDuckGo()],
+            show_tool_calls=True,
+            get_video_captions=True,
+            get_video_data=True,
+            description="Analyze YouTube videos for content, key points, and web research.",
+        )
+    except Exception as e:
+        st.error(f"Error initializing the agent: {e}")
+        st.stop()
+    video_url = st.text_input("Enter YouTube Video URL:", "")
+    user_query = st.text_area("Enter your question about the video (optional):", "")
+    if st.button("✨ Analyze Video", key="analyze_video_button"):
+        if video_url:
+            with st.spinner("Analyzing..."):
+                try:
+                    prompt = f"""
+                        Analyze the YouTube video.
+                        Provide a detailed summary with key points.
+                        {f'Respond to: {user_query}' if user_query else ''}
+                        Video URL: {video_url}
+                    """
+                    output = youtube_agent.run(prompt)
+                    st.subheader("Analysis Result")
+                    st.markdown(output.content)
+                except Exception as e:
+                    st.error(f"Error: {e}")
+        else:
+            st.warning("Please enter a YouTube video URL.")