""" ================================================================================ KASITBot — Training Platform Lets you chat normally, flag wrong/incomplete answers, provide corrections, and export the corrections as rag_dataset entries ready for re-indexing. ================================================================================ Run standalone: python trainer.py Or mount into your existing app.py by importing and registering the blueprint: from trainer import trainer_bp app.register_blueprint(trainer_bp) Then visit: http://localhost:5001/train ================================================================================ """ from flask import Flask, request, jsonify, render_template_string from flask_cors import CORS import json, os, uuid from pathlib import Path from datetime import datetime CORRECTIONS_FILE = Path("training_corrections.json") app = Flask(__name__) CORS(app) # ══════════════════════════════════════════════════════════════════════════════ # Corrections Storage # ══════════════════════════════════════════════════════════════════════════════ def load_corrections(): if CORRECTIONS_FILE.exists(): with open(CORRECTIONS_FILE, "r", encoding="utf-8") as f: return json.load(f) return [] def save_corrections(corrections): with open(CORRECTIONS_FILE, "w", encoding="utf-8") as f: json.dump(corrections, f, ensure_ascii=False, indent=2) # ══════════════════════════════════════════════════════════════════════════════ # API Routes # ══════════════════════════════════════════════════════════════════════════════ @app.route("/api/train/corrections", methods=["GET"]) def get_corrections(): return jsonify(load_corrections()) @app.route("/api/train/corrections", methods=["POST"]) def add_correction(): data = request.get_json(force=True) corrections = load_corrections() entry = { "id": str(uuid.uuid4())[:8], "created_at": datetime.now().isoformat(), "question": data.get("question", "").strip(), "bot_answer": data.get("bot_answer", "").strip(), "issue_type": data.get("issue_type", "wrong"), # wrong | partial | unknown "correct_text": data.get("correct_text", "").strip(), "source_hint": data.get("source_hint", "").strip(), # optional: doc name "language": data.get("language", "en"), "exported": False, } if not entry["question"] or not entry["correct_text"]: return jsonify({"error": "question and correct_text are required"}), 400 corrections.append(entry) save_corrections(corrections) return jsonify({"ok": True, "id": entry["id"], "total": len(corrections)}) @app.route("/api/train/corrections/", methods=["DELETE"]) def delete_correction(correction_id): corrections = load_corrections() corrections = [c for c in corrections if c["id"] != correction_id] save_corrections(corrections) return jsonify({"ok": True}) @app.route("/api/train/export", methods=["GET"]) def export_for_rag(): """ Export pending corrections as rag_dataset.json entries. Each correction becomes a Q&A chunk that will be indexed. Format matches rag_preprocessor.py output exactly. """ corrections = load_corrections() pending = [c for c in corrections if not c["exported"]] rag_entries = [] for c in pending: # Build a rich Q&A chunk — question + correct answer together # so the retriever finds it when the same question is asked again text = f"Q: {c['question']}\nA: {c['correct_text']}" rag_entries.append({ "text": text, "source": c["source_hint"] or "training_corrections", "chunk_id": c["id"], "language": "Arabic" if c["language"] == "ar" else "English", "was_translated": False, }) # Mark as exported for c in corrections: if not c["exported"]: c["exported"] = True save_corrections(corrections) export_path = Path("training_export.json") with open(export_path, "w", encoding="utf-8") as f: json.dump(rag_entries, f, ensure_ascii=False, indent=2) return jsonify({ "ok": True, "exported": len(rag_entries), "file": str(export_path), "entries": rag_entries, }) @app.route("/api/train/stats", methods=["GET"]) def stats(): corrections = load_corrections() return jsonify({ "total": len(corrections), "pending": sum(1 for c in corrections if not c["exported"]), "exported": sum(1 for c in corrections if c["exported"]), "wrong": sum(1 for c in corrections if c["issue_type"] == "wrong"), "partial": sum(1 for c in corrections if c["issue_type"] == "partial"), "unknown": sum(1 for c in corrections if c["issue_type"] == "unknown"), }) # ══════════════════════════════════════════════════════════════════════════════ # Proxy to main KASITBot (adjust URL if app.py runs on different port) # ══════════════════════════════════════════════════════════════════════════════ import urllib.request import urllib.error KASIT_BOT_URL = os.environ.get("KASIT_BOT_URL", "http://localhost:5000") @app.route("/api/train/chat", methods=["POST"]) def proxy_chat(): """Forward chat to the real KASITBot and return its response.""" body = request.get_data() try: req = urllib.request.Request( f"{KASIT_BOT_URL}/api/chat", data=body, headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=60) as resp: result = json.loads(resp.read()) return jsonify(result) except urllib.error.URLError as e: return jsonify({"error": f"KASITBot unreachable at {KASIT_BOT_URL}: {str(e)}"}), 502 except Exception as e: return jsonify({"error": str(e)}), 500 # ══════════════════════════════════════════════════════════════════════════════ # Frontend # ══════════════════════════════════════════════════════════════════════════════ HTML = r""" KASITBot — Training Platform
KASITBot Training Platform
total 0
pending 0
exported 0
Chat with KASITBot normally.
Flag wrong answers to add training data.
Correction Queue
""" @app.route("/train") @app.route("/") def index(): return render_template_string(HTML) if __name__ == "__main__": print("=" * 60) print(" KASITBot — Training Platform") print("=" * 60) print(f"\n KASITBot proxy target: {KASIT_BOT_URL}") print(" Corrections file: training_corrections.json") print(" Export file: training_export.json") print("\n Open: http://localhost:5001/train\n") app.run(debug=True, host="0.0.0.0", port=5001)