hal / main.py
piclez's picture
fix: pronounce HAL as a word in TTS; add Peter profile facts
6ecdf6a
import io
import json
import os
import re
import uuid
import wave
from pathlib import Path
from urllib.parse import quote
from dotenv import load_dotenv
from fastapi import FastAPI, File, Request, Response, UploadFile
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from anthropic import Anthropic
from groq import Groq
from piper import PiperVoice
from hal_prompt import HAL_SYSTEM_PROMPT
load_dotenv()
MODEL_PATH = "models/hal.onnx"
CLAUDE_MODEL = "claude-sonnet-4-6"
WHISPER_MODEL = "whisper-large-v3-turbo"
MAX_HISTORY_TURNS = 20
PROFILE_PATH = Path("profile.md")
DATA_DIR = Path(os.environ.get("HAL_DATA_DIR", "data"))
SESSIONS_DIR = DATA_DIR / "sessions"
SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
print("Loading HAL voice...")
VOICE = PiperVoice.load(MODEL_PATH)
print("HAL voice loaded")
if PROFILE_PATH.exists():
profile_text = PROFILE_PATH.read_text().strip()
SYSTEM_PROMPT = f"{HAL_SYSTEM_PROMPT}\n\n---\n\nContext about Peter:\n\n{profile_text}"
print(f"Loaded profile ({len(profile_text)} chars)")
else:
SYSTEM_PROMPT = HAL_SYSTEM_PROMPT
groq_client = Groq()
anthropic_client = Anthropic()
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
def session_file(session_id: str) -> Path:
return SESSIONS_DIR / f"{session_id}.json"
def load_history(session_id: str) -> list[dict]:
f = session_file(session_id)
if f.exists():
try:
return json.loads(f.read_text())
except (json.JSONDecodeError, OSError):
return []
return []
def save_history(session_id: str, history: list[dict]) -> None:
tmp = session_file(session_id).with_suffix(".json.tmp")
tmp.write_text(json.dumps(history))
tmp.replace(session_file(session_id))
def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
result = groq_client.audio.transcriptions.create(
file=(filename, audio_bytes),
model=WHISPER_MODEL,
language="en",
)
return result.text.strip()
def hal_respond(history: list[dict]) -> str:
resp = anthropic_client.messages.create(
model=CLAUDE_MODEL,
max_tokens=300,
system=SYSTEM_PROMPT,
messages=history,
)
return resp.content[0].text.strip()
def synthesize_hal(text: str) -> bytes:
# Piper spells all-caps acronyms letter-by-letter. Rewrite "HAL" to a
# phonetic form so it is spoken as a word.
spoken = re.sub(r"\bHAL\b", "Hal", text)
buf = io.BytesIO()
with wave.open(buf, "wb") as wav_file:
VOICE.synthesize_wav(spoken, wav_file)
return buf.getvalue()
@app.get("/")
def index():
return FileResponse("static/index.html")
@app.post("/api/talk")
async def talk(request: Request, audio: UploadFile = File(...)):
session_id = request.cookies.get("hal_session")
new_session = session_id is None
if new_session:
session_id = str(uuid.uuid4())
history = load_history(session_id)
audio_bytes = await audio.read()
filename = audio.filename or "audio.webm"
user_text = transcribe(audio_bytes, filename)
if not user_text:
resp = Response(status_code=204)
if new_session:
resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
return resp
history.append({"role": "user", "content": user_text})
trimmed = history[-MAX_HISTORY_TURNS:]
hal_text = hal_respond(trimmed)
history.append({"role": "assistant", "content": hal_text})
save_history(session_id, history)
wav_bytes = synthesize_hal(hal_text)
resp = Response(content=wav_bytes, media_type="audio/wav")
resp.headers["X-User-Transcript"] = quote(user_text)
resp.headers["X-Hal-Transcript"] = quote(hal_text)
if new_session:
resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
return resp