import json import logging from typing import AsyncGenerator from src.core.ports.llm_port import LlmPort from src.core.config import settings logger = logging.getLogger(__name__) class TogetherAdapter(LlmPort): """ Together AI adapter via OpenAI-compatible API. Free $25 credit on signup, then pay-as-you-go (very cheap). Available models (set TOGETHER_MODEL env var): meta-llama/Llama-3.3-70B-Instruct-Turbo ← default, best quality mistralai/Mistral-Small-24B-Instruct-2501 mistralai/Mixtral-8x7B-Instruct-v0.1 mistralai/Mistral-7B-Instruct-v0.1 Docs: https://docs.together.ai """ BASE_URL = "https://api.together.xyz/v1" DEFAULT_MODEL = "meta-llama/Llama-3.3-70B-Instruct-Turbo" def __init__(self): self.llm = None key = settings.TOGETHER_API_KEY if key and key != "your-together-api-key-here": try: from langchain_openai import ChatOpenAI model = settings.TOGETHER_MODEL or self.DEFAULT_MODEL self.llm = ChatOpenAI( api_key=key, base_url=self.BASE_URL, model=model, temperature=0.2, max_tokens=1024, ) logger.info(f"✅ Together AI adapter initialized with model: {model}") except Exception as e: logger.error(f"Failed to initialize Together adapter: {e}") else: logger.warning("TOGETHER_API_KEY not set — Together AI adapter disabled.") def generate(self, prompt: str) -> str: if not self.llm: return "Together AI API key not configured." try: return self.llm.invoke(prompt).content except Exception as e: error_msg = str(e) if "429" in error_msg or "rate" in error_msg.lower(): return "Together AI rate limit reached. Please try again shortly." logger.error(f"Together generate error: {e}") return f"Error generating response: {error_msg}" async def generate_stream(self, prompt: str) -> AsyncGenerator[str, None]: if not self.llm: yield f"data: {json.dumps({'token': 'Together AI API key not configured.'})}\n\n" yield "data: [DONE]\n\n" return try: for chunk in self.llm.stream(prompt): if hasattr(chunk, "content") and chunk.content: yield f"data: {json.dumps({'token': chunk.content})}\n\n" yield "data: [DONE]\n\n" except Exception as e: error_msg = str(e) if "429" in error_msg or "rate" in error_msg.lower(): msg = "Together AI rate limit reached. Please try again shortly." else: msg = f"Error: {error_msg}" yield f"data: {json.dumps({'token': msg})}\n\n" yield "data: [DONE]\n\n"