Spaces:

piclez
/

hal

Sleeping

App Files Files Community

hal / main.py

piclez

fix: pronounce HAL as a word in TTS; add Peter profile facts

6ecdf6a 28 days ago

raw

history blame contribute delete

3.91 kB

	import io
	import json
	import os
	import re
	import uuid
	import wave
	from pathlib import Path
	from urllib.parse import quote

	from dotenv import load_dotenv
	from fastapi import FastAPI, File, Request, Response, UploadFile
	from fastapi.responses import FileResponse
	from fastapi.staticfiles import StaticFiles

	from anthropic import Anthropic
	from groq import Groq
	from piper import PiperVoice

	from hal_prompt import HAL_SYSTEM_PROMPT

	load_dotenv()

	MODEL_PATH = "models/hal.onnx"
	CLAUDE_MODEL = "claude-sonnet-4-6"
	WHISPER_MODEL = "whisper-large-v3-turbo"
	MAX_HISTORY_TURNS = 20
	PROFILE_PATH = Path("profile.md")
	DATA_DIR = Path(os.environ.get("HAL_DATA_DIR", "data"))
	SESSIONS_DIR = DATA_DIR / "sessions"
	SESSIONS_DIR.mkdir(parents=True, exist_ok=True)

	print("Loading HAL voice...")
	VOICE = PiperVoice.load(MODEL_PATH)
	print("HAL voice loaded")

	if PROFILE_PATH.exists():
	profile_text = PROFILE_PATH.read_text().strip()
	SYSTEM_PROMPT = f"{HAL_SYSTEM_PROMPT}\n\n---\n\nContext about Peter:\n\n{profile_text}"
	print(f"Loaded profile ({len(profile_text)} chars)")
	else:
	SYSTEM_PROMPT = HAL_SYSTEM_PROMPT

	groq_client = Groq()
	anthropic_client = Anthropic()

	app = FastAPI()
	app.mount("/static", StaticFiles(directory="static"), name="static")


	def session_file(session_id: str) -> Path:
	return SESSIONS_DIR / f"{session_id}.json"


	def load_history(session_id: str) -> list[dict]:
	f = session_file(session_id)
	if f.exists():
	try:
	return json.loads(f.read_text())
	except (json.JSONDecodeError, OSError):
	return []
	return []


	def save_history(session_id: str, history: list[dict]) -> None:
	tmp = session_file(session_id).with_suffix(".json.tmp")
	tmp.write_text(json.dumps(history))
	tmp.replace(session_file(session_id))


	def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
	result = groq_client.audio.transcriptions.create(
	file=(filename, audio_bytes),
	model=WHISPER_MODEL,
	language="en",
	)
	return result.text.strip()


	def hal_respond(history: list[dict]) -> str:
	resp = anthropic_client.messages.create(
	model=CLAUDE_MODEL,
	max_tokens=300,
	system=SYSTEM_PROMPT,
	messages=history,
	)
	return resp.content[0].text.strip()


	def synthesize_hal(text: str) -> bytes:
	# Piper spells all-caps acronyms letter-by-letter. Rewrite "HAL" to a
	# phonetic form so it is spoken as a word.
	spoken = re.sub(r"\bHAL\b", "Hal", text)
	buf = io.BytesIO()
	with wave.open(buf, "wb") as wav_file:
	VOICE.synthesize_wav(spoken, wav_file)
	return buf.getvalue()


	@app.get("/")
	def index():
	return FileResponse("static/index.html")


	@app.post("/api/talk")
	async def talk(request: Request, audio: UploadFile = File(...)):
	session_id = request.cookies.get("hal_session")
	new_session = session_id is None
	if new_session:
	session_id = str(uuid.uuid4())
	history = load_history(session_id)

	audio_bytes = await audio.read()
	filename = audio.filename or "audio.webm"
	user_text = transcribe(audio_bytes, filename)

	if not user_text:
	resp = Response(status_code=204)
	if new_session:
	resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
	return resp

	history.append({"role": "user", "content": user_text})
	trimmed = history[-MAX_HISTORY_TURNS:]

	hal_text = hal_respond(trimmed)
	history.append({"role": "assistant", "content": hal_text})
	save_history(session_id, history)

	wav_bytes = synthesize_hal(hal_text)

	resp = Response(content=wav_bytes, media_type="audio/wav")
	resp.headers["X-User-Transcript"] = quote(user_text)
	resp.headers["X-Hal-Transcript"] = quote(hal_text)
	if new_session:
	resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
	return resp