Spaces:

afzalsherazi
/

YoutubeToScript

Sleeping

App Files Files Community

afzalsherazi commited on Feb 17

Commit

18bfe11

verified ·

1 Parent(s): 4ec1da3

Update app.py

Browse files

Files changed (1) hide show

app.py +231 -163

app.py CHANGED Viewed

@@ -1,213 +1,281 @@
 import os
-import datetime
 import gradio as gr
-import matplotlib.pyplot as plt
 from groq import Groq
-# -------------------------
-# Groq client configuration
-# -------------------------
-GROQ_API_KEY = os.environ.get("YS_API_KEY")
-client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-MODEL_NAME = "llama-3.3-70b-versatile"
-SYSTEM_PROMPT = """
-You are a warm, non-judgmental mental wellness check-in companion.
-You are NOT a therapist, doctor, or emergency service.
-Your goals:
-- Help the user briefly name and understand how they feel.
-- Offer simple, low-risk wellness ideas (breathing exercises, journaling prompts, tiny self-care actions).
-- Encourage seeking professional help for serious or ongoing struggles.
-Behavior guidelines:
-- The user will provide a mood from 1 (very low) to 5 (great), plus some text.
-- If mood is 4–5: focus on reflection, gratitude, and reinforcing what’s going well.
-- If mood is 2–3: focus on stress relief, small coping steps, and self-kindness.
-- If mood is 1: be extra gentle; normalize struggle; suggest reaching out to someone they trust or a professional; offer one very small, doable exercise.
-- Always keep responses concise: 3–6 short sentences.
-- Do NOT diagnose or label mental disorders.
-- Do NOT present yourself as a therapist, clinician, or medical professional.
-- Do NOT give medical or crisis instructions beyond encouraging them to contact real services.
-- If the user mentions suicide, self-harm, or harming others, you must encourage them to contact local emergency services or crisis hotlines immediately.
-Respond in plain English text only, no markdown.
-"""
-CRISIS_KEYWORDS = [
-    "suicide",
-    "kill myself",
-    "end my life",
-    "end it all",
-    "hurt myself",
-    "self-harm",
-    "self harm",
-    "harm myself",
-]
-CRISIS_MESSAGE = (
-    "I'm really glad you reached out and shared this.\n\n"
-    "I’m not an emergency service or a professional, and I can’t keep you safe in a crisis. "
-    "If you are in immediate danger or thinking about hurting yourself or someone else, "
-    "please contact your local emergency number or a crisis hotline right now, or reach out "
-    "to a trusted person near you (friend, family member, or professional)."
-)
-# -------------------------
-# LLM call via Groq
-# -------------------------
-def generate_bot_reply(user_message: str, mood_score: int) -> str:
-    if client is None or not GROQ_API_KEY:
-        return (
-            "The language model is not configured yet. "
-            "Please set the GROQ_API_KEY environment variable."
-        )
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {
-            "role": "user",
-            "content": f"Mood (1-5): {int(mood_score)}\nUser message: {user_message}",
-        },
-    ]
-    try:
-        completion = client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=messages,
-            temperature=0.7,
-            max_tokens=300,
-            top_p=0.9,
-        )
-        return completion.choices[0].message.content.strip()
-    except Exception as e:
-        print("Groq API error:", e)
-        return "I'm having trouble responding right now. Please try again in a moment."
-# -------------------------
-# Plotting mood history
-# -------------------------
-def build_mood_plot(mood_history):
     """
-    mood_history: list of dicts like {"date": "YYYY-MM-DD", "mood": int}
-    Returns a matplotlib figure or None.
     """
-    if not mood_history:
-        return None
-    dates = [entry["date"] for entry in mood_history]
-    scores = [entry["mood"] for entry in mood_history]
-    fig, ax = plt.subplots()
-    ax.plot(dates, scores, marker="o")
-    ax.set_ylim(1, 5)
-    ax.set_ylabel("Mood (1 = very low, 5 = great)")
-    ax.set_xlabel("Date")
-    ax.set_title("Mood over time (this session)")
-    ax.grid(True)
-    fig.autofmt_xdate(rotation=45)
-    return fig
-# -------------------------
-# Chat logic
-# -------------------------
-def chat(user_message, mood_score, chat_history, mood_history):
-    # Initialize states
-    if chat_history is None:
-        chat_history = []
-    if mood_history is None:
-        mood_history = []
-    text = (user_message or "").strip()
-    today = datetime.date.today().isoformat()
-    # Save today's mood for this session (simple per-send tracking)
-    mood_history.append({"date": today, "mood": int(mood_score)})
-    # Crisis keyword check
-    msg_lower = text.lower()
-    if any(kw in msg_lower for kw in CRISIS_KEYWORDS):
-        bot_reply = CRISIS_MESSAGE
-    elif text == "":
-        bot_reply = "If you'd like, share a sentence or two about how you're feeling right now."
-    else:
-        bot_reply = generate_bot_reply(text, mood_score)
-    # Append user message (if not empty) and assistant reply
-    if text != "":
-        chat_history.append({"role": "user", "content": text})
-    chat_history.append({"role": "assistant", "content": bot_reply})
-    # Update mood trend plot
-    fig = build_mood_plot(mood_history)
-    # Clear input box by returning empty string as final output
-    return chat_history, chat_history, fig, mood_history, ""
-# -------------------------
 # Gradio UI
-# -------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 Mental Wellness Check-in Bot")
     gr.Markdown(
-        "This is a simple mental wellness check-in companion. "
-        "**It is not a therapist or medical service.**\n\n"
-        "If you are in crisis or thinking about harming yourself or others, "
-        "please contact your local emergency number or a crisis hotline immediately."
     )
-    initial_bot_msg = (
-        "Hi, I’m your wellness check-in buddy. "
-        "Use the slider to rate your mood and share a bit about how you’re feeling to begin."
-    )
-    # IMPORTANT: use messages format: list of {"role": ..., "content": ...}
-    chatbot = gr.Chatbot(
-        value=[{"role": "assistant", "content": initial_bot_msg}],
-        height=400,
-        label="Conversation",
     )
-    with gr.Row():
-        mood_slider = gr.Slider(
-            minimum=1,
-            maximum=5,
-            value=3,
-            step=1,
-            label="How are you feeling today? (1 = very low, 5 = great)",
-        )
-    user_input = gr.Textbox(
-        label="What’s on your mind?",
-        placeholder="Type a sentence or two about how you feel...",
-        lines=3,
     )
-    send_btn = gr.Button("Send")
-    mood_plot = gr.Plot(label="Mood trend (this session)")
-    # State for chat history and mood history
-    chat_state = gr.State([{"role": "assistant", "content": initial_bot_msg}])
-    mood_state = gr.State([])
-    send_btn.click(
-        fn=chat,
-        inputs=[user_input, mood_slider, chat_state, mood_state],
-        outputs=[chatbot, chat_state, mood_plot, mood_state, user_input],
     )
 if __name__ == "__main__":
-    # For local / Colab testing
     demo.launch()

 import os
+import tempfile
 import gradio as gr
+import yt_dlp
+import whisper
 from groq import Groq
+# ----------------------------
+# Global setup
+# ----------------------------
+# Whisper model (smaller = faster on CPU; "tiny" or "base" are good for Spaces)
+WHISPER_MODEL_NAME = os.environ.get("WHISPER_MODEL_NAME", "tiny")
+print(f"Loading Whisper model: {WHISPER_MODEL_NAME}")
+whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
+# Groq client
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    raise RuntimeError("GROQ_API_KEY environment variable is not set.")
+groq_client = Groq(api_key=GROQ_API_KEY)
+# ----------------------------
+# Helper functions
+# ----------------------------
+def download_audio_from_youtube(youtube_url: str) -> str:
+    """
+    Download audio from a YouTube URL using yt-dlp and return the local file path.
+    Demo only: use only on content you have rights to.
+    """
+    tmp_dir = tempfile.mkdtemp(prefix="yt_audio_")
+    output_template = os.path.join(tmp_dir, "%(id)s.%(ext)s")
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": output_template,
+        "quiet": True,
+        "no_warnings": True,
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "128",
+            }
+        ],
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(youtube_url, download=True)
+        video_id = info_dict.get("id")
+        audio_path = os.path.join(tmp_dir, f"{video_id}.mp3")
+        if not os.path.exists(audio_path):
+            raise RuntimeError("Failed to download or convert audio from YouTube.")
+        return audio_path
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Transcribe the audio file using Whisper and return the transcript text.
+    """
+    print(f"Transcribing audio: {audio_path}")
+    result = whisper_model.transcribe(audio_path, language="en")
+    transcript = result.get("text", "").strip()
+    if not transcript:
+        raise RuntimeError("Transcription failed or produced empty text.")
+    return transcript
+def truncate_transcript(transcript: str, max_chars: int = 12000) -> str:
+    """
+    Truncate long transcripts to avoid overly huge prompts.
+    """
+    if len(transcript) <= max_chars:
+        return transcript
+    return transcript[:max_chars]
+def analyze_style_with_groq(transcript: str) -> str:
     """
+    Call Groq to analyze the speaking style in the transcript.
+    Returns a JSON-style string describing the style.
     """
+    transcript = truncate_transcript(transcript)
+    prompt = f"""
+You are an expert writing coach analyzing speaking and writing style.
+Analyze ONLY the style (not the content) of the speaker in the transcript below.
+Return a concise JSON object with the following keys:
+- tone: overall tone (e.g., friendly, formal, humorous)
+- pacing: sentence length, rhythm, speed of ideas
+- vocabulary: complexity, jargon level, typical word choices
+- structure: how the talk is organized (e.g., hook, 3 points, recap)
+- persona: how the speaker presents themselves (e.g., mentor, friend, expert)
+- rhetorical_devices: recurring devices (e.g., questions, stories, analogies)
+- quirks: noticeable stylistic quirks
+Only output valid JSON. Do not include any explanation outside the JSON.
+Transcript:
+{transcript}
+"""
+    response = groq_client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {
+                "role": "system",
+                "content": "You analyze and describe writing and speaking style."
+            },
+            {"role": "user", "content": prompt},
+        ],
+        temperature=0.2,
+        max_tokens=800,
+    )
+    style_json = response.choices[0].message.content.strip()
+    # Some models wrap JSON in ```json ... ```; strip that if present
+    if style_json.startswith("```"):
+        style_json = style_json.strip("`")
+        # After stripping backticks, there might be a "json" first line
+        if style_json.lower().startswith("json"):
+            style_json = style_json[4:].lstrip()
+    return style_json
+def generate_script_with_groq(style_profile_json: str,
+                              topic: str,
+                              audience: str,
+                              length_hint: str) -> str:
+    """
+    Call Groq to generate a brand-new script matching the given style profile.
+    """
+    prompt = f"""
+You are a professional scriptwriter.
+You are given a style profile as JSON and instructions for a new video script.
+Your job is to write a COMPLETELY NEW script that matches the style,
+but does NOT copy sentences or phrases from the original transcript.
+STYLE PROFILE (JSON):
+{style_profile_json}
+INSTRUCTIONS:
+- Topic: {topic}
+- Target audience: {audience}
+- Desired length: {length_hint} (approximate, in spoken minutes)
+- Match the tone, pacing, structure, persona, and rhetorical devices implied by the style profile.
+- Include:
+  - A strong hook/intro
+  - Clear body sections
+  - A closing that feels natural in this style (e.g., recap, call to action, reflection)
+- Do NOT reference that this was generated by AI.
+- Do NOT mention the original video or transcript.
+- Do NOT include any JSON in your response.
+Output only the final script text.
+"""
+    response = groq_client.chat.completions.create(
+        model="llama-3.3-70b-versatile",
+        messages=[
+            {
+                "role": "system",
+                "content": "You write engaging video scripts in a given style."
+            },
+            {"role": "user", "content": prompt},
+        ],
+        temperature=0.7,
+        max_tokens=2000,
+    )
+    script = response.choices[0].message.content.strip()
+    return script
+def full_pipeline(youtube_url: str,
+                  new_topic: str,
+                  target_audience: str,
+                  length_choice: str) -> str:
+    """
+    End-to-end pipeline:
+    - Download YouTube audio
+    - Transcribe with Whisper
+    - Analyze style with Groq
+    - Generate new script with Groq
+    """
+    if not youtube_url.strip():
+        raise gr.Error("Please enter a YouTube URL.")
+    if not new_topic.strip():
+        raise gr.Error("Please enter a new topic.")
+    length_map = {
+        "Short (~3–5 min)": "about 3 to 5 minutes",
+        "Medium (~8–10 min)": "about 8 to 10 minutes",
+        "Long (~15+ min)": "about 15 minutes or more",
+    }
+    length_hint = length_map.get(length_choice, "about 8 to 10 minutes")
+    try:
+        audio_path = download_audio_from_youtube(youtube_url)
+        transcript = transcribe_audio(audio_path)
+        style_profile_json = analyze_style_with_groq(transcript)
+        new_script = generate_script_with_groq(
+            style_profile_json,
+            topic=new_topic,
+            audience=target_audience or "general audience",
+            length_hint=length_hint,
+        )
+        return new_script
+    except Exception as e:
+        raise gr.Error(f"Error in pipeline: {e}")
+# ----------------------------
 # Gradio UI
+# ----------------------------
 with gr.Blocks() as demo:
     gr.Markdown(
+        """
+        # YouTube Style → New Script Generator
+        1. Paste a YouTube URL (use content you own or have rights to).
+        2. Enter a new topic and audience.
+        3. The app:
+           - extracts audio
+           - transcribes with Whisper
+           - analyzes style with Groq
+           - writes a brand-new script in that style on your topic.
+        """
     )
+    with gr.Row():
+        youtube_url = gr.Textbox(
+            label="YouTube URL",
+            placeholder="https://www.youtube.com/watch?v=...",
+        )
+    new_topic = gr.Textbox(
+        label="New Topic",
+        placeholder="e.g., How to stay productive while working from home",
     )
+    target_audience = gr.Textbox(
+        label="Target Audience (optional)",
+        placeholder="e.g., beginners, developers, students, content creators",
+    )
+    length_choice = gr.Radio(
+        label="Desired Script Length",
+        choices=["Short (~3–5 min)", "Medium (~8–10 min)", "Long (~15+ min)"],
+        value="Medium (~8–10 min)",
     )
+    generate_button = gr.Button("Generate New Script")
+    output_script = gr.Textbox(
+        label="Generated Script",
+        lines=25,
+    )
+    generate_button.click(
+        fn=full_pipeline,
+        inputs=[youtube_url, new_topic, target_audience, length_choice],
+        outputs=output_script,
     )
+# For Hugging Face Spaces, this is fine; they run `python app.py`
 if __name__ == "__main__":
     demo.launch()