ArijitMishra's picture
Update app.py
325c7d4 verified
Raw
History Blame Contribute Delete
5.67 kB
import gradio as gr
import whisper
import torch
from transformers import pipeline
import warnings
import spaces
warnings.filterwarnings("ignore")
@spaces.GPU
def transcribe(audio_path: str) -> str:
if audio_path is None:
return ""
print("Loading Whisper inside GPU...")
model = whisper.load_model("tiny")
result = model.transcribe(audio_path)
del model
torch.cuda.empty_cache() if torch.cuda.is_available() else None
return result["text"].strip()
@spaces.GPU
def reflect(transcript: str) -> str:
if not transcript:
return "No transcript..."
print("Loading Qwen inside GPU...")
generator = pipeline(
"text-generation",
model="Qwen/Qwen2.5-0.5B-Instruct",
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)
prompt = f"""You are a warm, thoughtful journaling companion. Your tone is human and gentle, never clinical or robotic.
The user just shared this voice journal entry:
"{transcript}"
Respond with exactly three parts, clearly separated:
**Mood check:** A 2-sentence summary of the emotional tone you picked up.
**What I noticed:** One pattern, theme, or detail that stood out in what they shared.
**Something to sit with:** One gentle, open question for them to reflect on later.
Keep the full response under 160 words. Be kind, specific, and real."""
messages = [
{"role": "system", "content": "You are a warm journaling companion. Be human, brief, and specific."},
{"role": "user", "content": prompt},
]
output = generator(
messages,
max_new_tokens=220,
do_sample=True,
temperature=0.7,
pad_token_id=generator.tokenizer.eos_token_id,
)
# Extract the assistant's reply from the chat template output
generated = output[0]["generated_text"]
if isinstance(generated, list):
# Chat format: last message is the assistant reply
reply = generated[-1]["content"]
else:
# Fallback: strip the prompt
reply = generated[len(prompt):]
del generator
torch.cuda.empty_cache() if torch.cuda.is_available() else None
return reply.strip()
def process_entry(audio_path):
"""Pipeline: audio → transcript → reflection."""
if audio_path is None:
return (
"",
"Please record something first, then click Reflect.",
)
transcript = transcribe(audio_path)
if not transcript:
return (
"",
"Couldn't make out any speech. Try speaking a bit louder or closer to the mic.",
)
reflection = reflect(transcript)
return transcript, reflection
DESCRIPTION = """
## Voice Journal
*Speak your day. Get it reflected back.*
Record a voice note about anything — how your day went, what's on your mind,
something that happened. A small AI model will listen and gently reflect it back to you.
Everything runs locally. Nothing leaves your machine.
"""
FOOTER = """
---
*Built for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) ·
Whisper (140M) + Qwen2.5-7B · No cloud APIs · Runs on your laptop*
"""
custom_css = """
body { font-family: 'Georgia', serif; }
#title-block { text-align: center; padding: 1.5rem 0 0.5rem; }
#record-col { display: flex; flex-direction: column; gap: 0.75rem; }
#reflect-btn {
background: #2d4a3e !important;
color: #f0ebe0 !important;
border: none !important;
border-radius: 8px !important;
font-size: 1rem !important;
padding: 0.75rem !important;
cursor: pointer;
}
#reflect-btn:hover { background: #1f3429 !important; }
#transcript-box textarea, #reflection-box textarea {
font-family: 'Georgia', serif !important;
font-size: 0.95rem !important;
line-height: 1.65 !important;
background: #faf8f3 !important;
border: 1px solid #d8d0c0 !important;
border-radius: 8px !important;
}
#reflection-box textarea {
background: #f0ede4 !important;
color: #2a2a2a !important;
}
footer { display: none !important; }
"""
with gr.Blocks(
title="Voice Journal",
theme=gr.themes.Soft(
primary_hue="emerald",
neutral_hue="stone",
font=gr.themes.GoogleFont("Lora"),
),
css=custom_css,
) as app:
with gr.Column(elem_id="title-block"):
gr.Markdown(DESCRIPTION)
with gr.Row(equal_height=False):
# Left column — input
with gr.Column(scale=1, elem_id="record-col"):
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record your entry",
show_download_button=False,
)
reflect_btn = gr.Button("Reflect →", elem_id="reflect-btn", variant="primary")
# Right column — output
with gr.Column(scale=1):
transcript_box = gr.Textbox(
label="What you said",
lines=5,
interactive=False,
placeholder="Your words will appear here after you click Reflect…",
elem_id="transcript-box",
)
reflection_box = gr.Textbox(
label="Reflection",
lines=8,
interactive=False,
placeholder="Your reflection will appear here…",
elem_id="reflection-box",
)
reflect_btn.click(
fn=process_entry,
inputs=audio_input,
outputs=[transcript_box, reflection_box],
api_name=False,
)
gr.Markdown(FOOTER)
if __name__ == "__main__":
app.launch(share=False)