Spaces:

build-small-hackathon
/

VoiceJournalReflection

Sleeping

App Files Files Community

VoiceJournalReflection / app.py

ArijitMishra

Update app.py

325c7d4 verified 12 days ago

Raw

History Blame Contribute Delete

5.67 kB

	import gradio as gr
	import whisper
	import torch
	from transformers import pipeline
	import warnings
	import spaces
	warnings.filterwarnings("ignore")



	@spaces.GPU
	def transcribe(audio_path: str) -> str:
	if audio_path is None:
	return ""
	print("Loading Whisper inside GPU...")
	model = whisper.load_model("tiny")
	result = model.transcribe(audio_path)
	del model
	torch.cuda.empty_cache() if torch.cuda.is_available() else None
	return result["text"].strip()


	@spaces.GPU
	def reflect(transcript: str) -> str:
	if not transcript:
	return "No transcript..."

	print("Loading Qwen inside GPU...")
	generator = pipeline(
	"text-generation",
	model="Qwen/Qwen2.5-0.5B-Instruct",
	device_map="auto",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	)
	prompt = f"""You are a warm, thoughtful journaling companion. Your tone is human and gentle, never clinical or robotic.

	The user just shared this voice journal entry:

	"{transcript}"

	Respond with exactly three parts, clearly separated:

	Mood check: A 2-sentence summary of the emotional tone you picked up.

	What I noticed: One pattern, theme, or detail that stood out in what they shared.

	Something to sit with: One gentle, open question for them to reflect on later.

	Keep the full response under 160 words. Be kind, specific, and real."""

	messages = [
	{"role": "system", "content": "You are a warm journaling companion. Be human, brief, and specific."},
	{"role": "user", "content": prompt},
	]

	output = generator(
	messages,
	max_new_tokens=220,
	do_sample=True,
	temperature=0.7,
	pad_token_id=generator.tokenizer.eos_token_id,
	)

	# Extract the assistant's reply from the chat template output
	generated = output[0]["generated_text"]
	if isinstance(generated, list):
	# Chat format: last message is the assistant reply
	reply = generated[-1]["content"]
	else:
	# Fallback: strip the prompt
	reply = generated[len(prompt):]
	del generator
	torch.cuda.empty_cache() if torch.cuda.is_available() else None
	return reply.strip()


	def process_entry(audio_path):
	"""Pipeline: audio → transcript → reflection."""
	if audio_path is None:
	return (
	"",
	"Please record something first, then click Reflect.",
	)

	transcript = transcribe(audio_path)
	if not transcript:
	return (
	"",
	"Couldn't make out any speech. Try speaking a bit louder or closer to the mic.",
	)

	reflection = reflect(transcript)
	return transcript, reflection



	DESCRIPTION = """
	## Voice Journal
	Speak your day. Get it reflected back.

	Record a voice note about anything — how your day went, what's on your mind,
	something that happened. A small AI model will listen and gently reflect it back to you.

	Everything runs locally. Nothing leaves your machine.
	"""

	FOOTER = """
	---
	*Built for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) ·
	Whisper (140M) + Qwen2.5-7B · No cloud APIs · Runs on your laptop*
	"""

	custom_css = """
	body { font-family: 'Georgia', serif; }

	#title-block { text-align: center; padding: 1.5rem 0 0.5rem; }

	#record-col { display: flex; flex-direction: column; gap: 0.75rem; }

	#reflect-btn {
	background: #2d4a3e !important;
	color: #f0ebe0 !important;
	border: none !important;
	border-radius: 8px !important;
	font-size: 1rem !important;
	padding: 0.75rem !important;
	cursor: pointer;
	}

	#reflect-btn:hover { background: #1f3429 !important; }

	#transcript-box textarea, #reflection-box textarea {
	font-family: 'Georgia', serif !important;
	font-size: 0.95rem !important;
	line-height: 1.65 !important;
	background: #faf8f3 !important;
	border: 1px solid #d8d0c0 !important;
	border-radius: 8px !important;
	}

	#reflection-box textarea {
	background: #f0ede4 !important;
	color: #2a2a2a !important;
	}

	footer { display: none !important; }
	"""

	with gr.Blocks(
	title="Voice Journal",
	theme=gr.themes.Soft(
	primary_hue="emerald",
	neutral_hue="stone",
	font=gr.themes.GoogleFont("Lora"),
	),
	css=custom_css,
	) as app:

	with gr.Column(elem_id="title-block"):
	gr.Markdown(DESCRIPTION)

	with gr.Row(equal_height=False):
	# Left column — input
	with gr.Column(scale=1, elem_id="record-col"):
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Record your entry",
	show_download_button=False,
	)
	reflect_btn = gr.Button("Reflect →", elem_id="reflect-btn", variant="primary")

	# Right column — output
	with gr.Column(scale=1):
	transcript_box = gr.Textbox(
	label="What you said",
	lines=5,
	interactive=False,
	placeholder="Your words will appear here after you click Reflect…",
	elem_id="transcript-box",
	)
	reflection_box = gr.Textbox(
	label="Reflection",
	lines=8,
	interactive=False,
	placeholder="Your reflection will appear here…",
	elem_id="reflection-box",
	)

	reflect_btn.click(
	fn=process_entry,
	inputs=audio_input,
	outputs=[transcript_box, reflection_box],
	api_name=False,
	)

	gr.Markdown(FOOTER)

	if __name__ == "__main__":
	app.launch(share=False)