| try: |
| from dotenv import load_dotenv |
| load_dotenv(override=True) |
| except ImportError: |
| pass |
|
|
|
|
| import os |
| from dataclasses import dataclass |
| from enum import StrEnum |
|
|
| from langchain_core.language_models import BaseChatModel |
| from langchain_core.messages import AIMessage, HumanMessage, SystemMessage |
| from langchain_groq import ChatGroq |
| from langchain_openai import ChatOpenAI |
|
|
| from agent.src.tools.lore_writer.writer import get_data |
| class Role(StrEnum): |
| """ |
| Papel funcional do LLM. Cada papel mapeia pra um modelo + temperatura |
| no _MODEL_CONFIG. Tools pedem por papel, não por modelo. |
| """ |
| CREATIVE = "creative" |
| FAST = "fast" |
| LOCAL = "local" |
|
|
|
|
| @dataclass(frozen=True) |
| class ModelSpec: |
| model: str |
| temperature: float |
|
|
|
|
| _MODEL_CONFIG: dict[Role, ModelSpec] = { |
| Role.CREATIVE: ModelSpec("llama-3.3-70b-versatile", 0.8), |
| Role.FAST: ModelSpec("groq/compound", 0.3), |
| Role.LOCAL: ModelSpec( |
| os.getenv("VLLM_MODEL", "Qwen/Qwen2.5-7B-Instruct-AWQ"), |
| 0.3, |
| ), |
| } |
|
|
| tools = [ |
| get_data, |
| ] |
|
|
|
|
| def get_llm(role: Role = Role.FAST) -> BaseChatModel: |
| spec = _MODEL_CONFIG[role] |
|
|
| if role is Role.LOCAL: |
| |
| return ChatOpenAI( |
| model=spec.model, |
| temperature=spec.temperature, |
| base_url=os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1"), |
| api_key=os.getenv("VLLM_API_KEY", "EMPTY"), |
| max_retries=2, |
| ) |
|
|
| return ChatGroq( |
| model=spec.model, |
| temperature=spec.temperature, |
| max_retries=2, |
| ) |
|
|
|
|
| |
| |
| |
| |
|
|
| _ROLE_TO_MESSAGE = { |
| "system": SystemMessage, |
| "user": HumanMessage, |
| "assistant": AIMessage, |
| } |
|
|
|
|
| def infer_model( |
| msg: str, |
| messages: list[dict] | None = None, |
| local: bool = False, |
| ) -> tuple[str, list[dict]]: |
| """Shim de compatibilidade com o infer_model antigo (Groq SDK puro).""" |
|
|
| role = Role.LOCAL |
| if messages is None: |
| messages = [{ |
| "role": "system", |
| "content": ( |
| "You are a helpful RPG session assistant for the Ekalia campaign. " |
| "Answer questions about lore, rules, NPCs, and help the game master " |
| "during live sessions. Be concise and useful at the table." |
| ), |
| }] |
|
|
| messages.append({"role": "user", "content": msg}) |
|
|
| lc_messages = [_ROLE_TO_MESSAGE[m["role"]](content=m["content"]) for m in messages] |
| response = get_llm(role).invoke(lc_messages) |
|
|
| messages.append({"role": "assistant", "content": response.content}) |
| return response.content, messages |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
|
|
| role = Role.LOCAL if "--local" in sys.argv else Role.CREATIVE |
| print(f"Smoke test com role={role.value}") |
| llm = get_llm(role) |
| spec = _MODEL_CONFIG[role] |
| messages = [ |
| SystemMessage(content="You are a helpful assistant for testing LLM responses."), |
| ] |
| while True: |
|
|
| user_input = input("\nVocê: ") |
|
|
| from langchain.agents import create_agent |
| agent = create_agent( |
| model=llm, |
| tools=tools, |
| ) |
| if user_input.lower() in ["sair", "exit", "quit"]: |
| break |
|
|
| messages.append(HumanMessage(content=user_input)) |
| |
| response = agent.invoke({"messages": messages}) |
| print(f"\n{spec.model}: {response['messages'][-1].content}") |
|
|
|
|
| messages.append(AIMessage(content=response['messages'][-1].content)) |
|
|