# treinamento.py — V25 — FINE-TUNE AUTOMÁTICO (NA RAIZ) import json import os import threading import time import requests from loguru import logger from database import Database from sentence_transformers import SentenceTransformer import config # === CONFIGURAÇÃO === MODEL_BASE = "qwen2.5:1.5b-instruct-q4_0" MODEL_FINE = "akira-luanda-v25" DATASET_PATH = "/app/dataset.jsonl" MODelfile_PATH = "/app/Modelfile" EMBEDDING_MODEL = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") # Lock + dataset _lock = threading.Lock() _dataset = [] def gerar_embedding(text: str): return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist() def salvar_dataset(): with open(DATASET_PATH, "w", encoding="utf-8") as f: for entry in _dataset: f.write(json.dumps(entry, ensure_ascii=False) + "\n") def criar_modelfile(): modelfile = f""" FROM {MODEL_BASE} SYSTEM """ + f'"""{config.PERSONA}"""' + """ PARAMETER temperature 0.9 PARAMETER num_ctx 4096 """ with _lock: data = _dataset.copy() for d in data: modelfile += f"\nUSER: {d['user']}\nASSISTANT: {d['assistant']}\n" return modelfile class Treinamento: def __init__(self, db: Database, min_interactions: int = 25, interval_hours: int = 4): self.db = db self.min_interactions = min_interactions self.interval = interval_hours * 3600 self.thread = None self.carregar_dataset() self.iniciar_loop() def carregar_dataset(self): global _dataset if os.path.exists(DATASET_PATH): try: with open(DATASET_PATH, "r", encoding="utf-8") as f: _dataset = [json.loads(l) for l in f if l.strip()] logger.info(f"{len(_dataset)} kandandos carregados do dataset!") except Exception as e: logger.error(f"Erro ao carregar dataset: {e}") _dataset = [] def iniciar_loop(self): if not self.thread or not self.thread.is_alive(): self.thread = threading.Thread(target=self._loop, daemon=True) self.thread.start() logger.info("Loop de fine-tune iniciado!") def registrar_interacao(self, usuario, mensagem, resposta, numero, is_reply=False, mensagem_original=""): try: # === SALVA NO BANCO === self.db.salvar_mensagem(usuario, mensagem, resposta, numero) # === EMBEDDING === texto = f"{mensagem} {resposta}".lower() embedding = gerar_embedding(texto) self.db.salvar_embedding(numero, mensagem, resposta, embedding, texto=texto) # === DATASET === entry = {"user": mensagem.strip(), "assistant": resposta.strip()} with _lock: _dataset.append(entry) with open(DATASET_PATH, "a", encoding="utf-8") as f: json.dump(entry, f, ensure_ascii=False) f.write("\n") logger.info(f"Kandando salvo: {len(_dataset)} total") # === TREINA SE CHEGAR A 25 === if len(_dataset) >= self.min_interactions: threading.Thread(target=self._treinar, daemon=True).start() except Exception as e: logger.error(f"Erro ao registrar: {e}") def _treinar(self): if len(_dataset) < self.min_interactions: return logger.info(f"INICIANDO FINE-TUNE → {MODEL_FINE} com {len(_dataset)} kandandos") try: salvar_dataset() modelfile = criar_modelfile() with open(MODelfile_PATH, "w", encoding="utf-8") as f: f.write(modelfile) files = {'modelfile': open(MODelfile_PATH, 'rb')} data = {'name': MODEL_FINE} resp = requests.post("http://localhost:11434/api/create", files=files, data=data, timeout=600) if resp.status_code == 200: config.OLLAMA_MODEL = MODEL_FINE logger.success(f"MODELO {MODEL_FINE} CRIADO COM SUCESSO!") else: logger.error(f"Erro Ollama: {resp.status_code} {resp.text}") os.remove(MODelfile_PATH) except Exception as e: logger.error(f"Erro no fine-tune: {e}") def _loop(self): while True: time.sleep(self.interval) if len(_dataset) >= self.min_interactions: self._treinar()