import gradio as gr import whisper import torch from transformers import pipeline import warnings import spaces warnings.filterwarnings("ignore") @spaces.GPU def transcribe(audio_path: str) -> str: if audio_path is None: return "" print("Loading Whisper inside GPU...") model = whisper.load_model("tiny") result = model.transcribe(audio_path) del model torch.cuda.empty_cache() if torch.cuda.is_available() else None return result["text"].strip() @spaces.GPU def reflect(transcript: str) -> str: if not transcript: return "No transcript..." print("Loading Qwen inside GPU...") generator = pipeline( "text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, ) prompt = f"""You are a warm, thoughtful journaling companion. Your tone is human and gentle, never clinical or robotic. The user just shared this voice journal entry: "{transcript}" Respond with exactly three parts, clearly separated: **Mood check:** A 2-sentence summary of the emotional tone you picked up. **What I noticed:** One pattern, theme, or detail that stood out in what they shared. **Something to sit with:** One gentle, open question for them to reflect on later. Keep the full response under 160 words. Be kind, specific, and real.""" messages = [ {"role": "system", "content": "You are a warm journaling companion. Be human, brief, and specific."}, {"role": "user", "content": prompt}, ] output = generator( messages, max_new_tokens=220, do_sample=True, temperature=0.7, pad_token_id=generator.tokenizer.eos_token_id, ) # Extract the assistant's reply from the chat template output generated = output[0]["generated_text"] if isinstance(generated, list): # Chat format: last message is the assistant reply reply = generated[-1]["content"] else: # Fallback: strip the prompt reply = generated[len(prompt):] del generator torch.cuda.empty_cache() if torch.cuda.is_available() else None return reply.strip() def process_entry(audio_path): """Pipeline: audio → transcript → reflection.""" if audio_path is None: return ( "", "Please record something first, then click Reflect.", ) transcript = transcribe(audio_path) if not transcript: return ( "", "Couldn't make out any speech. Try speaking a bit louder or closer to the mic.", ) reflection = reflect(transcript) return transcript, reflection DESCRIPTION = """ ## Voice Journal *Speak your day. Get it reflected back.* Record a voice note about anything — how your day went, what's on your mind, something that happened. A small AI model will listen and gently reflect it back to you. Everything runs locally. Nothing leaves your machine. """ FOOTER = """ --- *Built for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) · Whisper (140M) + Qwen2.5-7B · No cloud APIs · Runs on your laptop* """ custom_css = """ body { font-family: 'Georgia', serif; } #title-block { text-align: center; padding: 1.5rem 0 0.5rem; } #record-col { display: flex; flex-direction: column; gap: 0.75rem; } #reflect-btn { background: #2d4a3e !important; color: #f0ebe0 !important; border: none !important; border-radius: 8px !important; font-size: 1rem !important; padding: 0.75rem !important; cursor: pointer; } #reflect-btn:hover { background: #1f3429 !important; } #transcript-box textarea, #reflection-box textarea { font-family: 'Georgia', serif !important; font-size: 0.95rem !important; line-height: 1.65 !important; background: #faf8f3 !important; border: 1px solid #d8d0c0 !important; border-radius: 8px !important; } #reflection-box textarea { background: #f0ede4 !important; color: #2a2a2a !important; } footer { display: none !important; } """ with gr.Blocks( title="Voice Journal", theme=gr.themes.Soft( primary_hue="emerald", neutral_hue="stone", font=gr.themes.GoogleFont("Lora"), ), css=custom_css, ) as app: with gr.Column(elem_id="title-block"): gr.Markdown(DESCRIPTION) with gr.Row(equal_height=False): # Left column — input with gr.Column(scale=1, elem_id="record-col"): audio_input = gr.Audio( sources=["microphone"], type="filepath", label="Record your entry", show_download_button=False, ) reflect_btn = gr.Button("Reflect →", elem_id="reflect-btn", variant="primary") # Right column — output with gr.Column(scale=1): transcript_box = gr.Textbox( label="What you said", lines=5, interactive=False, placeholder="Your words will appear here after you click Reflect…", elem_id="transcript-box", ) reflection_box = gr.Textbox( label="Reflection", lines=8, interactive=False, placeholder="Your reflection will appear here…", elem_id="reflection-box", ) reflect_btn.click( fn=process_entry, inputs=audio_input, outputs=[transcript_box, reflection_box], api_name=False, ) gr.Markdown(FOOTER) if __name__ == "__main__": app.launch(share=False)