piclez commited on
Commit
1b5f7e2
·
1 Parent(s): fffd4a7

feat: add HAL conversation loop (Piper TTS, Groq Whisper, Claude)

Browse files
Files changed (1) hide show
  1. main.py +100 -0
main.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import uuid
4
+ import wave
5
+ from urllib.parse import quote
6
+
7
+ from dotenv import load_dotenv
8
+ from fastapi import FastAPI, File, Request, Response, UploadFile
9
+ from fastapi.responses import FileResponse
10
+ from fastapi.staticfiles import StaticFiles
11
+
12
+ from anthropic import Anthropic
13
+ from groq import Groq
14
+ from piper import PiperVoice
15
+
16
+ from hal_prompt import HAL_SYSTEM_PROMPT
17
+
18
+ load_dotenv()
19
+
20
+ MODEL_PATH = "models/hal.onnx"
21
+ CLAUDE_MODEL = "claude-sonnet-4-6"
22
+ WHISPER_MODEL = "whisper-large-v3-turbo"
23
+ MAX_HISTORY_TURNS = 20
24
+
25
+ print("Loading HAL voice...")
26
+ VOICE = PiperVoice.load(MODEL_PATH)
27
+ print("HAL voice loaded")
28
+
29
+ groq_client = Groq()
30
+ anthropic_client = Anthropic()
31
+
32
+ SESSIONS: dict[str, list[dict]] = {}
33
+
34
+ app = FastAPI()
35
+ app.mount("/static", StaticFiles(directory="static"), name="static")
36
+
37
+
38
+ def transcribe(audio_bytes: bytes, filename: str = "audio.webm") -> str:
39
+ result = groq_client.audio.transcriptions.create(
40
+ file=(filename, audio_bytes),
41
+ model=WHISPER_MODEL,
42
+ language="en",
43
+ )
44
+ return result.text.strip()
45
+
46
+
47
+ def hal_respond(history: list[dict]) -> str:
48
+ resp = anthropic_client.messages.create(
49
+ model=CLAUDE_MODEL,
50
+ max_tokens=300,
51
+ system=HAL_SYSTEM_PROMPT,
52
+ messages=history,
53
+ )
54
+ return resp.content[0].text.strip()
55
+
56
+
57
+ def synthesize_hal(text: str) -> bytes:
58
+ buf = io.BytesIO()
59
+ with wave.open(buf, "wb") as wav_file:
60
+ VOICE.synthesize_wav(text, wav_file)
61
+ return buf.getvalue()
62
+
63
+
64
+ @app.get("/")
65
+ def index():
66
+ return FileResponse("static/index.html")
67
+
68
+
69
+ @app.post("/api/talk")
70
+ async def talk(request: Request, audio: UploadFile = File(...)):
71
+ session_id = request.cookies.get("hal_session")
72
+ new_session = session_id is None
73
+ if new_session:
74
+ session_id = str(uuid.uuid4())
75
+ history = SESSIONS.setdefault(session_id, [])
76
+
77
+ audio_bytes = await audio.read()
78
+ filename = audio.filename or "audio.webm"
79
+ user_text = transcribe(audio_bytes, filename)
80
+
81
+ if not user_text:
82
+ resp = Response(status_code=204)
83
+ if new_session:
84
+ resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
85
+ return resp
86
+
87
+ history.append({"role": "user", "content": user_text})
88
+ trimmed = history[-MAX_HISTORY_TURNS:]
89
+
90
+ hal_text = hal_respond(trimmed)
91
+ history.append({"role": "assistant", "content": hal_text})
92
+
93
+ wav_bytes = synthesize_hal(hal_text)
94
+
95
+ resp = Response(content=wav_bytes, media_type="audio/wav")
96
+ resp.headers["X-User-Transcript"] = quote(user_text)
97
+ resp.headers["X-Hal-Transcript"] = quote(hal_text)
98
+ if new_session:
99
+ resp.set_cookie("hal_session", session_id, httponly=True, samesite="lax")
100
+ return resp