OLLAMA / treinamento.py
akra35567's picture
Update treinamento.py
4dd4c49 verified
# treinamento.py — V25 — FINE-TUNE AUTOMÁTICO (NA RAIZ)
import json
import os
import threading
import time
import requests
from loguru import logger
from database import Database
from sentence_transformers import SentenceTransformer
import config
# === CONFIGURAÇÃO ===
MODEL_BASE = "qwen2.5:1.5b-instruct-q4_0"
MODEL_FINE = "akira-luanda-v25"
DATASET_PATH = "/app/dataset.jsonl"
MODelfile_PATH = "/app/Modelfile"
EMBEDDING_MODEL = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
# Lock + dataset
_lock = threading.Lock()
_dataset = []
def gerar_embedding(text: str):
return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist()
def salvar_dataset():
with open(DATASET_PATH, "w", encoding="utf-8") as f:
for entry in _dataset:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
def criar_modelfile():
modelfile = f"""
FROM {MODEL_BASE}
SYSTEM """ + f'"""{config.PERSONA}"""' + """
PARAMETER temperature 0.9
PARAMETER num_ctx 4096
"""
with _lock:
data = _dataset.copy()
for d in data:
modelfile += f"\nUSER: {d['user']}\nASSISTANT: {d['assistant']}\n"
return modelfile
class Treinamento:
def __init__(self, db: Database, min_interactions: int = 25, interval_hours: int = 4):
self.db = db
self.min_interactions = min_interactions
self.interval = interval_hours * 3600
self.thread = None
self.carregar_dataset()
self.iniciar_loop()
def carregar_dataset(self):
global _dataset
if os.path.exists(DATASET_PATH):
try:
with open(DATASET_PATH, "r", encoding="utf-8") as f:
_dataset = [json.loads(l) for l in f if l.strip()]
logger.info(f"{len(_dataset)} kandandos carregados do dataset!")
except Exception as e:
logger.error(f"Erro ao carregar dataset: {e}")
_dataset = []
def iniciar_loop(self):
if not self.thread or not self.thread.is_alive():
self.thread = threading.Thread(target=self._loop, daemon=True)
self.thread.start()
logger.info("Loop de fine-tune iniciado!")
def registrar_interacao(self, usuario, mensagem, resposta, numero, is_reply=False, mensagem_original=""):
try:
# === SALVA NO BANCO ===
self.db.salvar_mensagem(usuario, mensagem, resposta, numero)
# === EMBEDDING ===
texto = f"{mensagem} {resposta}".lower()
embedding = gerar_embedding(texto)
self.db.salvar_embedding(numero, mensagem, resposta, embedding, texto=texto)
# === DATASET ===
entry = {"user": mensagem.strip(), "assistant": resposta.strip()}
with _lock:
_dataset.append(entry)
with open(DATASET_PATH, "a", encoding="utf-8") as f:
json.dump(entry, f, ensure_ascii=False)
f.write("\n")
logger.info(f"Kandando salvo: {len(_dataset)} total")
# === TREINA SE CHEGAR A 25 ===
if len(_dataset) >= self.min_interactions:
threading.Thread(target=self._treinar, daemon=True).start()
except Exception as e:
logger.error(f"Erro ao registrar: {e}")
def _treinar(self):
if len(_dataset) < self.min_interactions:
return
logger.info(f"INICIANDO FINE-TUNE → {MODEL_FINE} com {len(_dataset)} kandandos")
try:
salvar_dataset()
modelfile = criar_modelfile()
with open(MODelfile_PATH, "w", encoding="utf-8") as f:
f.write(modelfile)
files = {'modelfile': open(MODelfile_PATH, 'rb')}
data = {'name': MODEL_FINE}
resp = requests.post("http://localhost:11434/api/create", files=files, data=data, timeout=600)
if resp.status_code == 200:
config.OLLAMA_MODEL = MODEL_FINE
logger.success(f"MODELO {MODEL_FINE} CRIADO COM SUCESSO!")
else:
logger.error(f"Erro Ollama: {resp.status_code} {resp.text}")
os.remove(MODelfile_PATH)
except Exception as e:
logger.error(f"Erro no fine-tune: {e}")
def _loop(self):
while True:
time.sleep(self.interval)
if len(_dataset) >= self.min_interactions:
self._treinar()