| import gradio as gr |
| import whisper |
| import torch |
| from transformers import pipeline |
| import warnings |
| import spaces |
| warnings.filterwarnings("ignore") |
|
|
|
|
|
|
| @spaces.GPU |
| def transcribe(audio_path: str) -> str: |
| if audio_path is None: |
| return "" |
| print("Loading Whisper inside GPU...") |
| model = whisper.load_model("tiny") |
| result = model.transcribe(audio_path) |
| del model |
| torch.cuda.empty_cache() if torch.cuda.is_available() else None |
| return result["text"].strip() |
|
|
|
|
| @spaces.GPU |
| def reflect(transcript: str) -> str: |
| if not transcript: |
| return "No transcript..." |
|
|
| print("Loading Qwen inside GPU...") |
| generator = pipeline( |
| "text-generation", |
| model="Qwen/Qwen2.5-0.5B-Instruct", |
| device_map="auto", |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| ) |
| prompt = f"""You are a warm, thoughtful journaling companion. Your tone is human and gentle, never clinical or robotic. |
| |
| The user just shared this voice journal entry: |
| |
| "{transcript}" |
| |
| Respond with exactly three parts, clearly separated: |
| |
| **Mood check:** A 2-sentence summary of the emotional tone you picked up. |
| |
| **What I noticed:** One pattern, theme, or detail that stood out in what they shared. |
| |
| **Something to sit with:** One gentle, open question for them to reflect on later. |
| |
| Keep the full response under 160 words. Be kind, specific, and real.""" |
|
|
| messages = [ |
| {"role": "system", "content": "You are a warm journaling companion. Be human, brief, and specific."}, |
| {"role": "user", "content": prompt}, |
| ] |
|
|
| output = generator( |
| messages, |
| max_new_tokens=220, |
| do_sample=True, |
| temperature=0.7, |
| pad_token_id=generator.tokenizer.eos_token_id, |
| ) |
|
|
| |
| generated = output[0]["generated_text"] |
| if isinstance(generated, list): |
| |
| reply = generated[-1]["content"] |
| else: |
| |
| reply = generated[len(prompt):] |
| del generator |
| torch.cuda.empty_cache() if torch.cuda.is_available() else None |
| return reply.strip() |
|
|
|
|
| def process_entry(audio_path): |
| """Pipeline: audio → transcript → reflection.""" |
| if audio_path is None: |
| return ( |
| "", |
| "Please record something first, then click Reflect.", |
| ) |
|
|
| transcript = transcribe(audio_path) |
| if not transcript: |
| return ( |
| "", |
| "Couldn't make out any speech. Try speaking a bit louder or closer to the mic.", |
| ) |
|
|
| reflection = reflect(transcript) |
| return transcript, reflection |
|
|
|
|
|
|
| DESCRIPTION = """ |
| ## Voice Journal |
| *Speak your day. Get it reflected back.* |
| |
| Record a voice note about anything — how your day went, what's on your mind, |
| something that happened. A small AI model will listen and gently reflect it back to you. |
| |
| Everything runs locally. Nothing leaves your machine. |
| """ |
|
|
| FOOTER = """ |
| --- |
| *Built for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) · |
| Whisper (140M) + Qwen2.5-7B · No cloud APIs · Runs on your laptop* |
| """ |
|
|
| custom_css = """ |
| body { font-family: 'Georgia', serif; } |
| |
| #title-block { text-align: center; padding: 1.5rem 0 0.5rem; } |
| |
| #record-col { display: flex; flex-direction: column; gap: 0.75rem; } |
| |
| #reflect-btn { |
| background: #2d4a3e !important; |
| color: #f0ebe0 !important; |
| border: none !important; |
| border-radius: 8px !important; |
| font-size: 1rem !important; |
| padding: 0.75rem !important; |
| cursor: pointer; |
| } |
| |
| #reflect-btn:hover { background: #1f3429 !important; } |
| |
| #transcript-box textarea, #reflection-box textarea { |
| font-family: 'Georgia', serif !important; |
| font-size: 0.95rem !important; |
| line-height: 1.65 !important; |
| background: #faf8f3 !important; |
| border: 1px solid #d8d0c0 !important; |
| border-radius: 8px !important; |
| } |
| |
| #reflection-box textarea { |
| background: #f0ede4 !important; |
| color: #2a2a2a !important; |
| } |
| |
| footer { display: none !important; } |
| """ |
|
|
| with gr.Blocks( |
| title="Voice Journal", |
| theme=gr.themes.Soft( |
| primary_hue="emerald", |
| neutral_hue="stone", |
| font=gr.themes.GoogleFont("Lora"), |
| ), |
| css=custom_css, |
| ) as app: |
|
|
| with gr.Column(elem_id="title-block"): |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(equal_height=False): |
| |
| with gr.Column(scale=1, elem_id="record-col"): |
| audio_input = gr.Audio( |
| sources=["microphone"], |
| type="filepath", |
| label="Record your entry", |
| show_download_button=False, |
| ) |
| reflect_btn = gr.Button("Reflect →", elem_id="reflect-btn", variant="primary") |
|
|
| |
| with gr.Column(scale=1): |
| transcript_box = gr.Textbox( |
| label="What you said", |
| lines=5, |
| interactive=False, |
| placeholder="Your words will appear here after you click Reflect…", |
| elem_id="transcript-box", |
| ) |
| reflection_box = gr.Textbox( |
| label="Reflection", |
| lines=8, |
| interactive=False, |
| placeholder="Your reflection will appear here…", |
| elem_id="reflection-box", |
| ) |
|
|
| reflect_btn.click( |
| fn=process_entry, |
| inputs=audio_input, |
| outputs=[transcript_box, reflection_box], |
| api_name=False, |
| ) |
|
|
| gr.Markdown(FOOTER) |
|
|
| if __name__ == "__main__": |
| app.launch(share=False) |
|
|