import os, json, time from .rag import build_index, search from .llm import BluLLM, make_messages LOG_DIR = ".config/self_improve" os.makedirs(LOG_DIR, exist_ok=True) def log_interaction(query: str, response: str, feedback: str=None, metadata: dict=None): entry = { "timestamp": int(time.time()), "query": query, "response": response, "feedback": feedback or "", "meta": metadata or {} } with open(os.path.join(LOG_DIR, "interactions.jsonl"), "a", encoding="utf-8") as f: f.write(json.dumps(entry, ensure_ascii=False) + "\\n") def generate_synthetic_training(n=50): """ Use Blu to generate high-quality instruction-response pairs from logs. These output JSONL suitable for QLoRA SFT. """ out = [] try: lines = open(os.path.join(LOG_DIR, "interactions.jsonl"), "r", encoding="utf-8").read().strip().splitlines() except Exception: lines = [] examples = [json.loads(l) for l in lines[-n:]] llm = BluLLM() for ex in examples: prompt = f"Rewrite the user's query into a clear instruction and produce an improved answer with step-by-step reasoning. Query: {ex['query']}. Response: {ex['response']}." msgs = make_messages(prompt, system_text="You are an expert data annotator.") try: improved = llm.chat(msgs) out.append({ "instruction": prompt, "input": "", "output": improved }) except Exception: continue out_path = os.path.join(LOG_DIR, "synthetic_for_finetune.jsonl") with open(out_path, "w", encoding="utf-8") as f: for o in out: f.write(json.dumps(o, ensure_ascii=False) + "\\n") return out_path