| | |
| | import json |
| | import os |
| | import threading |
| | import time |
| | import requests |
| | from loguru import logger |
| | from database import Database |
| | from sentence_transformers import SentenceTransformer |
| | import config |
| |
|
| | |
| | MODEL_BASE = "qwen2.5:1.5b-instruct-q4_0" |
| | MODEL_FINE = "akira-luanda-v25" |
| | DATASET_PATH = "/app/dataset.jsonl" |
| | MODelfile_PATH = "/app/Modelfile" |
| | EMBEDDING_MODEL = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") |
| |
|
| | |
| | _lock = threading.Lock() |
| | _dataset = [] |
| |
|
| | def gerar_embedding(text: str): |
| | return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist() |
| |
|
| | def salvar_dataset(): |
| | with open(DATASET_PATH, "w", encoding="utf-8") as f: |
| | for entry in _dataset: |
| | f.write(json.dumps(entry, ensure_ascii=False) + "\n") |
| |
|
| | def criar_modelfile(): |
| | modelfile = f""" |
| | FROM {MODEL_BASE} |
| | SYSTEM """ + f'"""{config.PERSONA}"""' + """ |
| | PARAMETER temperature 0.9 |
| | PARAMETER num_ctx 4096 |
| | """ |
| | with _lock: |
| | data = _dataset.copy() |
| | for d in data: |
| | modelfile += f"\nUSER: {d['user']}\nASSISTANT: {d['assistant']}\n" |
| | return modelfile |
| |
|
| | class Treinamento: |
| | def __init__(self, db: Database, min_interactions: int = 25, interval_hours: int = 4): |
| | self.db = db |
| | self.min_interactions = min_interactions |
| | self.interval = interval_hours * 3600 |
| | self.thread = None |
| | self.carregar_dataset() |
| | self.iniciar_loop() |
| |
|
| | def carregar_dataset(self): |
| | global _dataset |
| | if os.path.exists(DATASET_PATH): |
| | try: |
| | with open(DATASET_PATH, "r", encoding="utf-8") as f: |
| | _dataset = [json.loads(l) for l in f if l.strip()] |
| | logger.info(f"{len(_dataset)} kandandos carregados do dataset!") |
| | except Exception as e: |
| | logger.error(f"Erro ao carregar dataset: {e}") |
| | _dataset = [] |
| |
|
| | def iniciar_loop(self): |
| | if not self.thread or not self.thread.is_alive(): |
| | self.thread = threading.Thread(target=self._loop, daemon=True) |
| | self.thread.start() |
| | logger.info("Loop de fine-tune iniciado!") |
| |
|
| | def registrar_interacao(self, usuario, mensagem, resposta, numero, is_reply=False, mensagem_original=""): |
| | try: |
| | |
| | self.db.salvar_mensagem(usuario, mensagem, resposta, numero) |
| |
|
| | |
| | texto = f"{mensagem} {resposta}".lower() |
| | embedding = gerar_embedding(texto) |
| | self.db.salvar_embedding(numero, mensagem, resposta, embedding, texto=texto) |
| |
|
| | |
| | entry = {"user": mensagem.strip(), "assistant": resposta.strip()} |
| | with _lock: |
| | _dataset.append(entry) |
| | with open(DATASET_PATH, "a", encoding="utf-8") as f: |
| | json.dump(entry, f, ensure_ascii=False) |
| | f.write("\n") |
| |
|
| | logger.info(f"Kandando salvo: {len(_dataset)} total") |
| |
|
| | |
| | if len(_dataset) >= self.min_interactions: |
| | threading.Thread(target=self._treinar, daemon=True).start() |
| |
|
| | except Exception as e: |
| | logger.error(f"Erro ao registrar: {e}") |
| |
|
| | def _treinar(self): |
| | if len(_dataset) < self.min_interactions: |
| | return |
| | logger.info(f"INICIANDO FINE-TUNE → {MODEL_FINE} com {len(_dataset)} kandandos") |
| |
|
| | try: |
| | salvar_dataset() |
| | modelfile = criar_modelfile() |
| | with open(MODelfile_PATH, "w", encoding="utf-8") as f: |
| | f.write(modelfile) |
| |
|
| | files = {'modelfile': open(MODelfile_PATH, 'rb')} |
| | data = {'name': MODEL_FINE} |
| | resp = requests.post("http://localhost:11434/api/create", files=files, data=data, timeout=600) |
| |
|
| | if resp.status_code == 200: |
| | config.OLLAMA_MODEL = MODEL_FINE |
| | logger.success(f"MODELO {MODEL_FINE} CRIADO COM SUCESSO!") |
| | else: |
| | logger.error(f"Erro Ollama: {resp.status_code} {resp.text}") |
| |
|
| | os.remove(MODelfile_PATH) |
| | except Exception as e: |
| | logger.error(f"Erro no fine-tune: {e}") |
| |
|
| | def _loop(self): |
| | while True: |
| | time.sleep(self.interval) |
| | if len(_dataset) >= self.min_interactions: |
| | self._treinar() |