Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import gradio as gr | |
| import yt_dlp | |
| import whisper | |
| from groq import Groq | |
| # ---------------------------- | |
| # Global setup | |
| # ---------------------------- | |
| # Use a small Whisper model for HF Spaces (tiny/base) | |
| WHISPER_MODEL_NAME = os.environ.get("WHISPER_MODEL_NAME", "tiny") | |
| print(f"Loading Whisper model: {WHISPER_MODEL_NAME}") | |
| whisper_model = whisper.load_model(WHISPER_MODEL_NAME) | |
| # Lazy-init Groq client so we can show errors nicely in the UI | |
| groq_client = None | |
| def get_groq_client(): | |
| global groq_client | |
| if groq_client is not None: | |
| return groq_client | |
| api_key = os.environ.get("YS_API_KEY") | |
| if not api_key: | |
| # Show in UI instead of crashing container | |
| raise gr.Error( | |
| "GROQ_API_KEY environment variable is not set. " | |
| "Set it in your Hugging Face Space settings under 'Variables and secrets'." | |
| ) | |
| groq_client = Groq(api_key=api_key) | |
| return groq_client | |
| # ---------------------------- | |
| # Helper functions | |
| # ---------------------------- | |
| def download_audio_from_youtube(youtube_url: str) -> str: | |
| """ | |
| Download audio from a YouTube URL using yt-dlp and return the local file path. | |
| NOTE: This requires the environment to have internet access and be allowed | |
| to reach YouTube. On many hosted environments (like some HF Spaces), | |
| this may fail due to network restrictions. | |
| """ | |
| tmp_dir = tempfile.mkdtemp(prefix="yt_audio_") | |
| output_template = os.path.join(tmp_dir, "%(id)s.%(ext)s") | |
| ydl_opts = { | |
| "format": "bestaudio/best", | |
| "outtmpl": output_template, | |
| "quiet": True, | |
| "no_warnings": True, | |
| "postprocessors": [ | |
| { | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "mp3", | |
| "preferredquality": "128", | |
| } | |
| ], | |
| } | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info_dict = ydl.extract_info(youtube_url, download=True) | |
| video_id = info_dict.get("id") | |
| audio_path = os.path.join(tmp_dir, f"{video_id}.mp3") | |
| if not os.path.exists(audio_path): | |
| raise RuntimeError("Failed to download or convert audio from YouTube.") | |
| return audio_path | |
| except Exception as e: | |
| # Wrap the raw yt-dlp/network error in a user-friendly message | |
| raise RuntimeError( | |
| "Unable to download from YouTube. " | |
| "This environment probably has no internet access or blocks youtube.com. " | |
| "Try running locally or use the 'Upload file' tab instead.\n\n" | |
| f"Details: {e}" | |
| ) | |
| def transcribe_audio(audio_path: str) -> str: | |
| """ | |
| Transcribe the audio file using Whisper and return the transcript text. | |
| """ | |
| print(f"Transcribing audio: {audio_path}") | |
| result = whisper_model.transcribe(audio_path, language="en") | |
| transcript = result.get("text", "").strip() | |
| if not transcript: | |
| raise RuntimeError("Transcription failed or produced empty text.") | |
| return transcript | |
| def truncate_transcript(transcript: str, max_chars: int = 12000) -> str: | |
| """ | |
| Truncate long transcripts to avoid overly huge prompts. | |
| """ | |
| if len(transcript) <= max_chars: | |
| return transcript | |
| return transcript[:max_chars] | |
| def analyze_style_with_groq(transcript: str) -> str: | |
| """ | |
| Call Groq to analyze the speaking style in the transcript. | |
| Returns a JSON-style string describing the style. | |
| """ | |
| client = get_groq_client() | |
| transcript = truncate_transcript(transcript) | |
| prompt = f""" | |
| You are an expert writing coach analyzing speaking and writing style. | |
| Analyze ONLY the style (not the content) of the speaker in the transcript below. | |
| Return a concise JSON object with the following keys: | |
| - tone: overall tone (e.g., friendly, formal, humorous) | |
| - pacing: sentence length, rhythm, speed of ideas | |
| - vocabulary: complexity, jargon level, typical word choices | |
| - structure: how the talk is organized (e.g., hook, 3 points, recap) | |
| - persona: how the speaker presents themselves (e.g., mentor, friend, expert) | |
| - rhetorical_devices: recurring devices (e.g., questions, stories, analogies) | |
| - quirks: noticeable stylistic quirks | |
| Only output valid JSON. Do not include any explanation outside the JSON. | |
| Transcript: | |
| {transcript} | |
| """ | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You analyze and describe writing and speaking style." | |
| }, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0.2, | |
| max_tokens=800, | |
| ) | |
| style_json = response.choices[0].message.content.strip() | |
| # Some models wrap JSON in ```json ... ```; strip that if present | |
| if style_json.startswith("```"): | |
| # remove leading/trailing backticks | |
| style_json = style_json.strip("`") | |
| # optionally remove leading "json" word | |
| if style_json.lower().startswith("json"): | |
| style_json = style_json[4:].lstrip() | |
| return style_json | |
| def generate_script_with_groq(style_profile_json: str, | |
| topic: str, | |
| audience: str, | |
| length_hint: str) -> str: | |
| """ | |
| Call Groq to generate a brand-new script matching the given style profile. | |
| """ | |
| client = get_groq_client() | |
| prompt = f""" | |
| You are a professional scriptwriter. | |
| You are given a style profile as JSON and instructions for a new video script. | |
| Your job is to write a COMPLETELY NEW script that matches the style, | |
| but does NOT copy sentences or phrases from the original transcript. | |
| STYLE PROFILE (JSON): | |
| {style_profile_json} | |
| INSTRUCTIONS: | |
| - Topic: {topic} | |
| - Target audience: {audience} | |
| - Desired length: {length_hint} (approximate, in spoken minutes) | |
| - Match the tone, pacing, structure, persona, and rhetorical devices implied by the style profile. | |
| - Include: | |
| - A strong hook/intro | |
| - Clear body sections | |
| - A closing that feels natural in this style (e.g., recap, call to action, reflection) | |
| - Do NOT reference that this was generated by AI. | |
| - Do NOT mention the original video or transcript. | |
| - Do NOT include any JSON in your response. | |
| Output only the final script text. | |
| """ | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You write engaging video scripts in a given style." | |
| }, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0.7, | |
| max_tokens=2000, | |
| ) | |
| script = response.choices[0].message.content.strip() | |
| return script | |
| def map_length_choice(length_choice: str) -> str: | |
| length_map = { | |
| "Short (~3–5 min)": "about 3 to 5 minutes", | |
| "Medium (~8–10 min)": "about 8 to 10 minutes", | |
| "Long (~15+ min)": "about 15 minutes or more", | |
| } | |
| return length_map.get(length_choice, "about 8 to 10 minutes") | |
| # ---------------------------- | |
| # Pipelines | |
| # ---------------------------- | |
| def pipeline_from_youtube(youtube_url: str, | |
| new_topic: str, | |
| target_audience: str, | |
| length_choice: str) -> str: | |
| """ | |
| End-to-end pipeline from YouTube URL. | |
| This will NOT work if the environment cannot reach youtube.com. | |
| """ | |
| if not youtube_url.strip(): | |
| raise gr.Error("Please enter a YouTube URL.") | |
| if not new_topic.strip(): | |
| raise gr.Error("Please enter a new topic.") | |
| length_hint = map_length_choice(length_choice) | |
| try: | |
| audio_path = download_audio_from_youtube(youtube_url) | |
| transcript = transcribe_audio(audio_path) | |
| style_profile_json = analyze_style_with_groq(transcript) | |
| new_script = generate_script_with_groq( | |
| style_profile_json, | |
| topic=new_topic, | |
| audience=target_audience or "general audience", | |
| length_hint=length_hint, | |
| ) | |
| return new_script | |
| except Exception as e: | |
| raise gr.Error(f"Error in pipeline: {e}") | |
| def pipeline_from_file(audio_file: str, | |
| new_topic: str, | |
| target_audience: str, | |
| length_choice: str) -> str: | |
| """ | |
| End-to-end pipeline from uploaded file (audio or video). | |
| This works fine even if YouTube is blocked. | |
| """ | |
| if audio_file is None or audio_file == "": | |
| raise gr.Error("Please upload an audio or video file.") | |
| if not new_topic.strip(): | |
| raise gr.Error("Please enter a new topic.") | |
| length_hint = map_length_choice(length_choice) | |
| try: | |
| transcript = transcribe_audio(audio_file) | |
| style_profile_json = analyze_style_with_groq(transcript) | |
| new_script = generate_script_with_groq( | |
| style_profile_json, | |
| topic=new_topic, | |
| audience=target_audience or "general audience", | |
| length_hint=length_hint, | |
| ) | |
| return new_script | |
| except Exception as e: | |
| raise gr.Error(f"Error in pipeline: {e}") | |
| # ---------------------------- | |
| # Gradio UI | |
| # ---------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # YouTube Style → New Script Generator | |
| ⚠️ **Note for Hugging Face Spaces:** | |
| Direct YouTube download may fail if this Space has no internet access | |
| or if youtube.com is blocked. | |
| In that case, use the **“Upload file”** tab instead. | |
| ## How it works | |
| 1. We transcribe audio with Whisper. | |
| 2. Groq analyzes style. | |
| 3. Groq writes a brand-new script in the same style, on your topic. | |
| """ | |
| ) | |
| with gr.Tab("From YouTube URL"): | |
| youtube_url = gr.Textbox( | |
| label="YouTube URL", | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| ) | |
| new_topic_y = gr.Textbox( | |
| label="New Topic", | |
| placeholder="e.g., How to stay productive while working from home", | |
| ) | |
| target_audience_y = gr.Textbox( | |
| label="Target Audience (optional)", | |
| placeholder="e.g., beginners, developers, students, content creators", | |
| ) | |
| length_choice_y = gr.Radio( | |
| label="Desired Script Length", | |
| choices=["Short (~3–5 min)", "Medium (~8–10 min)", "Long (~15+ min)"], | |
| value="Medium (~8–10 min)", | |
| ) | |
| generate_button_y = gr.Button("Generate from YouTube URL") | |
| output_script_y = gr.Textbox( | |
| label="Generated Script", | |
| lines=25, | |
| ) | |
| generate_button_y.click( | |
| fn=pipeline_from_youtube, | |
| inputs=[youtube_url, new_topic_y, target_audience_y, length_choice_y], | |
| outputs=output_script_y, | |
| ) | |
| with gr.Tab("Upload file (recommended for Spaces)"): | |
| audio_file = gr.Audio( | |
| label="Upload audio or video file", | |
| type="filepath", | |
| sources=["upload"], | |
| ) | |
| new_topic_f = gr.Textbox( | |
| label="New Topic", | |
| placeholder="e.g., How to stay productive while working from home", | |
| ) | |
| target_audience_f = gr.Textbox( | |
| label="Target Audience (optional)", | |
| placeholder="e.g., beginners, developers, students, content creators", | |
| ) | |
| length_choice_f = gr.Radio( | |
| label="Desired Script Length", | |
| choices=["Short (~3–5 min)", "Medium (~8–10 min)", "Long (~15+ min)"], | |
| value="Medium (~8–10 min)", | |
| ) | |
| generate_button_f = gr.Button("Generate from Uploaded File") | |
| output_script_f = gr.Textbox( | |
| label="Generated Script", | |
| lines=25, | |
| ) | |
| generate_button_f.click( | |
| fn=pipeline_from_file, | |
| inputs=[audio_file, new_topic_f, target_audience_f, length_choice_f], | |
| outputs=output_script_f, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |