drrobot9's picture
stt and ttt together commit
3c95cb7 verified
Raw
History Blame Contribute Delete
3.66 kB
import os
from transformers import safetensors_conversion
safetensors_conversion.auto_conversion = lambda *args, **kwargs: None
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
import uuid
import base64
from huggingface_hub import login, get_token
from fastapi import FastAPI, UploadFile, File, Header, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional
# Authenticate
hf_token = os.environ.get("HF_TOKEN") or get_token()
if hf_token:
login(token=hf_token)
else:
raise RuntimeError(
"No HuggingFace token found. "
"Set HF_TOKEN environment variable or run `huggingface-cli login`."
)
from memory import get_history, append_turn, clear_session
from stt_module.pipeline import stt_pipeline
from text_module.router import prepare_user_message
from llm.engine import llm_engine
app = FastAPI(title="FarmLingua AI", version="1.0.0")
# Helpers
def resolve_uid(x_uid: Optional[str]) -> str:
return x_uid if x_uid else str(uuid.uuid4())
def encode_header(value: str) -> str:
"""Base64-encode header values that may contain non-latin-1 characters."""
return base64.b64encode(value.encode("utf-8")).decode("ascii")
def stream_llm(uid: str, channel: str, user_message: str):
history = get_history(uid, channel)
append_turn(uid, channel, "user", user_message)
streamer = llm_engine.stream(history, user_message)
full_response = []
for token in streamer:
full_response.append(token)
yield token
assistant_reply = "".join(full_response).strip()
append_turn(uid, channel, "assistant", assistant_reply)
# Routes
@app.post("/stt/transcribe-and-chat")
async def stt_transcribe_and_chat(
audio: UploadFile = File(...),
x_uid: Optional[str] = Header(default=None),
):
uid = resolve_uid(x_uid)
audio_bytes = await audio.read()
try:
stt_result = stt_pipeline.transcribe(audio_bytes)
except ValueError as e:
raise HTTPException(status_code=422, detail=str(e))
transcription = stt_result["transcription"]
headers = {
"X-UID": uid,
"X-Transcription": encode_header(transcription), # base64 — safe for latin-1
"X-Language": stt_result["language"],
"X-Confidence": str(stt_result["confidence"]),
"Access-Control-Expose-Headers": "X-UID, X-Transcription, X-Language, X-Confidence",
}
try:
return StreamingResponse(
stream_llm(uid, "stt", transcription),
media_type="text/plain",
headers=headers,
)
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))
class TextRequest(BaseModel):
message: str
@app.post("/text/chat")
async def text_chat(
body: TextRequest,
x_uid: Optional[str] = Header(default=None),
):
uid = resolve_uid(x_uid)
try:
user_message = prepare_user_message(body.message)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
headers = {
"X-UID": uid,
"Access-Control-Expose-Headers": "X-UID",
}
try:
return StreamingResponse(
stream_llm(uid, "text", user_message),
media_type="text/plain",
headers=headers,
)
except RuntimeError as e:
raise HTTPException(status_code=503, detail=str(e))
@app.delete("/session")
async def clear_user_session(x_uid: str = Header(...)):
clear_session(x_uid)
return {"status": "cleared", "uid": x_uid}
@app.get("/health")
async def health():
return {"status": "ok"}