Spaces:

build-small-hackathon
/

VoiceJournalReflection

Sleeping

File size: 5,674 Bytes

import gradio as gr
import whisper
import torch
from transformers import pipeline
import warnings
import spaces
warnings.filterwarnings("ignore")



@spaces.GPU
def transcribe(audio_path: str) -> str:
    if audio_path is None:
        return ""
    print("Loading Whisper inside GPU...")
    model = whisper.load_model("tiny")
    result = model.transcribe(audio_path)
    del model
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    return result["text"].strip()


@spaces.GPU
def reflect(transcript: str) -> str:
    if not transcript:
        return "No transcript..."

    print("Loading Qwen inside GPU...")
    generator = pipeline(
        "text-generation",
        model="Qwen/Qwen2.5-0.5B-Instruct",
        device_map="auto",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    )
    prompt = f"""You are a warm, thoughtful journaling companion. Your tone is human and gentle, never clinical or robotic.

The user just shared this voice journal entry:

"{transcript}"

Respond with exactly three parts, clearly separated:

**Mood check:** A 2-sentence summary of the emotional tone you picked up.

**What I noticed:** One pattern, theme, or detail that stood out in what they shared.

**Something to sit with:** One gentle, open question for them to reflect on later.

Keep the full response under 160 words. Be kind, specific, and real."""

    messages = [
        {"role": "system", "content": "You are a warm journaling companion. Be human, brief, and specific."},
        {"role": "user", "content": prompt},
    ]

    output = generator(
        messages,
        max_new_tokens=220,
        do_sample=True,
        temperature=0.7,
        pad_token_id=generator.tokenizer.eos_token_id,
    )

    # Extract the assistant's reply from the chat template output
    generated = output[0]["generated_text"]
    if isinstance(generated, list):
        # Chat format: last message is the assistant reply
        reply = generated[-1]["content"]
    else:
        # Fallback: strip the prompt
        reply = generated[len(prompt):]
    del generator
    torch.cuda.empty_cache() if torch.cuda.is_available() else None    
    return reply.strip()


def process_entry(audio_path):
    """Pipeline: audio → transcript → reflection."""
    if audio_path is None:
        return (
            "",
            "Please record something first, then click Reflect.",
        )

    transcript = transcribe(audio_path)
    if not transcript:
        return (
            "",
            "Couldn't make out any speech. Try speaking a bit louder or closer to the mic.",
        )

    reflection = reflect(transcript)
    return transcript, reflection



DESCRIPTION = """
## Voice Journal
*Speak your day. Get it reflected back.*

Record a voice note about anything — how your day went, what's on your mind, 
something that happened. A small AI model will listen and gently reflect it back to you.

Everything runs locally. Nothing leaves your machine.
"""

FOOTER = """
---
*Built for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) · 
Whisper (140M) + Qwen2.5-7B · No cloud APIs · Runs on your laptop*
"""

custom_css = """
body { font-family: 'Georgia', serif; }

#title-block { text-align: center; padding: 1.5rem 0 0.5rem; }

#record-col { display: flex; flex-direction: column; gap: 0.75rem; }

#reflect-btn {
    background: #2d4a3e !important;
    color: #f0ebe0 !important;
    border: none !important;
    border-radius: 8px !important;
    font-size: 1rem !important;
    padding: 0.75rem !important;
    cursor: pointer;
}

#reflect-btn:hover { background: #1f3429 !important; }

#transcript-box textarea, #reflection-box textarea {
    font-family: 'Georgia', serif !important;
    font-size: 0.95rem !important;
    line-height: 1.65 !important;
    background: #faf8f3 !important;
    border: 1px solid #d8d0c0 !important;
    border-radius: 8px !important;
}

#reflection-box textarea {
    background: #f0ede4 !important;
    color: #2a2a2a !important;
}

footer { display: none !important; }
"""

with gr.Blocks(
    title="Voice Journal",
    theme=gr.themes.Soft(
        primary_hue="emerald",
        neutral_hue="stone",
        font=gr.themes.GoogleFont("Lora"),
    ),
    css=custom_css,
) as app:

    with gr.Column(elem_id="title-block"):
        gr.Markdown(DESCRIPTION)

    with gr.Row(equal_height=False):
        # Left column — input
        with gr.Column(scale=1, elem_id="record-col"):
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                label="Record your entry",
                show_download_button=False,
            )
            reflect_btn = gr.Button("Reflect →", elem_id="reflect-btn", variant="primary")

        # Right column — output
        with gr.Column(scale=1):
            transcript_box = gr.Textbox(
                label="What you said",
                lines=5,
                interactive=False,
                placeholder="Your words will appear here after you click Reflect…",
                elem_id="transcript-box",
            )
            reflection_box = gr.Textbox(
                label="Reflection",
                lines=8,
                interactive=False,
                placeholder="Your reflection will appear here…",
                elem_id="reflection-box",
            )

    reflect_btn.click(
        fn=process_entry,
        inputs=audio_input,
        outputs=[transcript_box, reflection_box],
        api_name=False,
    )

    gr.Markdown(FOOTER)

if __name__ == "__main__":
    app.launch(share=False)