import os from transformers import safetensors_conversion safetensors_conversion.auto_conversion = lambda *args, **kwargs: None os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1" import uuid import base64 from huggingface_hub import login, get_token from fastapi import FastAPI, UploadFile, File, Header, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel from typing import Optional # Authenticate hf_token = os.environ.get("HF_TOKEN") or get_token() if hf_token: login(token=hf_token) else: raise RuntimeError( "No HuggingFace token found. " "Set HF_TOKEN environment variable or run `huggingface-cli login`." ) from memory import get_history, append_turn, clear_session from stt_module.pipeline import stt_pipeline from text_module.router import prepare_user_message from llm.engine import llm_engine app = FastAPI(title="FarmLingua AI", version="1.0.0") # Helpers def resolve_uid(x_uid: Optional[str]) -> str: return x_uid if x_uid else str(uuid.uuid4()) def encode_header(value: str) -> str: """Base64-encode header values that may contain non-latin-1 characters.""" return base64.b64encode(value.encode("utf-8")).decode("ascii") def stream_llm(uid: str, channel: str, user_message: str): history = get_history(uid, channel) append_turn(uid, channel, "user", user_message) streamer = llm_engine.stream(history, user_message) full_response = [] for token in streamer: full_response.append(token) yield token assistant_reply = "".join(full_response).strip() append_turn(uid, channel, "assistant", assistant_reply) # Routes @app.post("/stt/transcribe-and-chat") async def stt_transcribe_and_chat( audio: UploadFile = File(...), x_uid: Optional[str] = Header(default=None), ): uid = resolve_uid(x_uid) audio_bytes = await audio.read() try: stt_result = stt_pipeline.transcribe(audio_bytes) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) transcription = stt_result["transcription"] headers = { "X-UID": uid, "X-Transcription": encode_header(transcription), # base64 — safe for latin-1 "X-Language": stt_result["language"], "X-Confidence": str(stt_result["confidence"]), "Access-Control-Expose-Headers": "X-UID, X-Transcription, X-Language, X-Confidence", } try: return StreamingResponse( stream_llm(uid, "stt", transcription), media_type="text/plain", headers=headers, ) except RuntimeError as e: raise HTTPException(status_code=503, detail=str(e)) class TextRequest(BaseModel): message: str @app.post("/text/chat") async def text_chat( body: TextRequest, x_uid: Optional[str] = Header(default=None), ): uid = resolve_uid(x_uid) try: user_message = prepare_user_message(body.message) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) headers = { "X-UID": uid, "Access-Control-Expose-Headers": "X-UID", } try: return StreamingResponse( stream_llm(uid, "text", user_message), media_type="text/plain", headers=headers, ) except RuntimeError as e: raise HTTPException(status_code=503, detail=str(e)) @app.delete("/session") async def clear_user_session(x_uid: str = Header(...)): clear_session(x_uid) return {"status": "cleared", "uid": x_uid} @app.get("/health") async def health(): return {"status": "ok"}