Spaces:

NguyenThanh1405
/

deploy_chatbot_demo

Sleeping

App Files Files Community

NguyenThanh1405 commited on Jan 21

Commit

4cfe4fa

1 Parent(s): 2eebc40

Deploy CQL Chatbot (without large files)

Browse files

Files changed (19) hide show

.gitignore +53 -0
Agent_Diffusion/inference_v3.py +198 -0
Agent_Diffusion/stable_diffusion.py +174 -0
Agent_Diffusion/train_unet.py +331 -0
Agent_Diffusion/train_vae.py +159 -0
Agent_Diffusion/unet-mini.safetensors +3 -0
Agent_Diffusion/vae-finetuned.safetensors +3 -0
Conservative Q-learning/Agen1_training.py +114 -0
Conservative Q-learning/cql_agent.py +242 -0
Conservative Q-learning/cql_utils.py +82 -0
Conservative Q-learning/saved_agent_1/cql_model.pth +3 -0
Conservative Q-learning/saved_agent_1/normalizer.pkl +3 -0
app.py +432 -0
chatbot_engine.py +238 -0
communication_agent.py +96 -0
config.py +51 -0
drawing_agent.py +208 -0
memory_manager.py +172 -0
requirements.txt +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,53 @@

+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+# Virtual Environment
+.venv/
+venv/
+ENV/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# Environment variables (IMPORTANT: Don't upload API keys!)
+.env
+# Conversation history (user data)
+conversation_history/
+# Generated images (will be created on server)
+generated_images/
+# Model cache
+.cache/
+# Logs
+*.log
+# OS
+.DS_Store
+Thumbs.db
+# Large model files (upload separately to HF)
+# Uncomment if models are too large for git
+# Agent_Diffusion/*.safetensors
+# Conservative Q-learning/saved_agent_1/*.pth
+# Conservative Q-learning/saved_agent_1/*.pkl
+# Backup files
+*_backup.py
+*_gpt2.py
+communication_agent_gemini_backup.py
+# Antigravity artifacts
+.gemini/

Agent_Diffusion/inference_v3.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import cv2
+import torch
+from transformers import Swin2SRImageProcessor, Swin2SRForImageSuperResolution
+from diffusers import StableDiffusionPipeline
+import numpy as np
+from PIL import Image, ImageEnhance, ImageOps
+import random
+# from safetensors.torch import load_file
+from stable_diffusion import MiniDiffusionPipeline
+# --- Cấu hình ---
+#PROMPT = "beautiful woman with long braided hair, wearing a scarf, soft smile, looking down, detailed shading" #725562173
+#PROMPT = "attractive woman, big lips, mouth slightly open, heavy makeup" #v5
+#PROMPT = "The man is young and has sharp jawline, narrow eyes, thick eyebrows, and short black hair." #10, 11
+#PROMPT = "She is elderly with deep smile lines, small eyes, and short curly gray hair." #13
+#PROMPT = "This man is old and smiling, with gray beard and big nose"
+PROMPT = "a baby"
+SAVE_IMAGE_PATH = "./15.png"
+UNET_SAFE_PATH = "./unet-mini.safetensors"
+VAE_SAFE_PATH = "./vae-finetuned.safetensors"
+BASE_MODEL_ID = "runwayml/stable-diffusion-v1-5"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+TINY_UNET_CONFIG = {
+    "unet_block_out_channels": (128, 256, 512),
+}
+MODEL_ID = "caidas/swin2SR-classical-sr-x4-64"
+print(f"Đang load model {MODEL_ID} từ Hugging Face...")
+processor = Swin2SRImageProcessor.from_pretrained(MODEL_ID)
+model = Swin2SRForImageSuperResolution.from_pretrained(MODEL_ID)
+print("Load model thành công!")
+model = model.to(DEVICE)
+def upscale_image_pipeline(pil_image, contrast=1.3, sharpen=1.5, target_size=(512, 512)):
+    """
+    Chiến thuật "Canvas Isolation" (Cách ly khung tranh).
+    Đặt ảnh vào giữa một vùng trắng cực rộng để đẩy lỗi biên ra xa.
+    """
+    if model is None or processor is None:
+        return pil_image.resize(target_size)
+    # 1. Chuẩn bị ảnh
+    img_np = np.array(pil_image)
+    if len(img_np.shape) == 2:
+        img_np = cv2.cvtColor(img_np, cv2.COLOR_GRAY2RGB)
+    h_orig, w_orig = img_np.shape[:2]
+    # 2. TẠO CANVAS (Khung tranh) LỚN
+    # Tạo một nền trắng to gấp đôi ảnh gốc (256x256)
+    # Mục đích: Đưa ảnh thật vào "vùng an toàn" ở trung tâm tuyệt đối
+    canvas_size = 256
+    canvas = np.ones((canvas_size, canvas_size, 3), dtype=np.uint8) * 255
+    # Tính tọa độ để dán ảnh vào giữa
+    y_offset = (canvas_size - h_orig) // 2 # (256-128)/2 = 64
+    x_offset = (canvas_size - w_orig) // 2 # 64
+    # Dán ảnh vào canvas
+    canvas[y_offset:y_offset+h_orig, x_offset:x_offset+w_orig] = img_np
+    # 3. Upscale toàn bộ Canvas
+    # Lúc này model sẽ xử lý biên của ảnh 256x256 -> Lỗi phản chiếu sẽ nằm ở rìa canvas (cách ảnh thật rất xa)
+    pil_canvas = Image.fromarray(canvas)
+    inputs = processor(pil_canvas, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    output_tensor = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output_tensor = np.moveaxis(output_tensor, 0, -1)
+    output_canvas = (output_tensor * 255.0).round().astype(np.uint8)
+    # 4. TRÍCH XUẤT ẢNH THẬT (CROP)
+    # Canvas input 256 -> Upscale x4 -> Canvas output 1024
+    # Ảnh thật nằm ở vị trí offset * 4
+    scale_factor = 4
+    y_start = y_offset * scale_factor      # 64 * 4 = 256
+    x_start = x_offset * scale_factor      # 256
+    # Kích thước ảnh thật sau khi upscale (128 * 4 = 512)
+    h_real = h_orig * scale_factor
+    w_real = w_orig * scale_factor
+    # Cắt lấy đúng phần ảnh thật nằm giữa canvas
+    final_img = output_canvas[y_start : y_start + h_real, x_start : x_start + w_real]
+    # 5. BIỆN PHÁP CƯỠNG BỨC (HARD FIX)
+    # Nếu model vẫn "lì lợm" tạo ra 1-2 pixel mờ ở đáy, ta sẽ tô trắng 3 dòng pixel cuối cùng.
+    # Vì đây là tranh vẽ trên nền trắng, việc này không ảnh hưởng nội dung nhưng xóa sạch mọi lỗi.
+    final_img[-1:, :, :] = 255
+    final_img[:, -1:, :] = 255
+    # 6. Đảm bảo kích thước cuối cùng
+    if final_img.shape[:2] != target_size:
+        final_img = cv2.resize(final_img, (target_size[1], target_size[0]), interpolation=cv2.INTER_LANCZOS4)
+    # 7. Hậu xử lý
+    final_pil = Image.fromarray(final_img)
+    enhancer = ImageEnhance.Contrast(final_pil)
+    final_pil = enhancer.enhance(contrast)
+    enhancer = ImageEnhance.Sharpness(final_pil)
+    final_pil = enhancer.enhance(sharpen)
+    return final_pil
+@torch.no_grad()
+def main():
+    print("--- Bắt đầu quá trình Inference (từ Safetensors) ---")
+    # --- Khởi tạo MiniDiffusionPipeline ---
+    print(f"Đang tải pipeline gốc từ {BASE_MODEL_ID}...")
+    container = MiniDiffusionPipeline(
+        base_model_id=BASE_MODEL_ID,
+        device=DEVICE,
+        config_overrides=TINY_UNET_CONFIG
+    )
+    # --- Tải trọng số đã huấn luyện ---
+    # Tải UNet
+    print(f"Đang tải trọng số UNet từ {UNET_SAFE_PATH}...")
+    try:
+        unet_weights = torch.load(UNET_SAFE_PATH, map_location=DEVICE)
+        container.unet.load_state_dict(unet_weights)
+    except Exception as e:
+        print(f"LỖI: Không thể tải UNet state dict: {e}")
+        print("Kiểm tra xem bạn đã bỏ chú thích 'config_overrides=TINY_UNET_CONFIG' chưa?")
+        return
+    # Tải VAE
+    print(f"Đang tải trọng số VAE từ {VAE_SAFE_PATH}...")
+    try:
+        vae_weights = torch.load(VAE_SAFE_PATH, map_location=DEVICE)
+        container.vae.load_state_dict(vae_weights)
+    except Exception as e:
+        print(f"LỖI: Không thể tải VAE state dict: {e}")
+        return
+    # --- Khởi tạo StableDiffusionPipeline ---
+    torch_dtype = torch.float16 if DEVICE.startswith("cuda") else torch.float32
+    print("Đang tạo pipeline inference...")
+    inference_pipeline = StableDiffusionPipeline(
+        unet=container.unet,
+        vae=container.vae,
+        text_encoder=container.text_encoder,
+        tokenizer=container.tokenizer,
+        scheduler=container.noise_scheduler,
+        safety_checker=None,
+        feature_extractor=None,
+    ).to(DEVICE)
+    if DEVICE.startswith("cuda"):
+         inference_pipeline.to(dtype=torch_dtype)
+    inference_pipeline.set_progress_bar_config(disable=False)
+    # --- Tạo ảnh ---
+    print(f"\nĐang tạo ảnh cho prompt: '{PROMPT}'")
+    current_seed = random.randint(0, 2**32 - 1)
+    print(f"Seed hiện tại: {current_seed}")
+    generator = torch.Generator(device=DEVICE).manual_seed(current_seed) #725562173, 4169604779, 725562172, 3884820838, 1794046812, 1379970385
+    image = inference_pipeline(
+        prompt=PROMPT,
+        num_inference_steps=50,
+        generator=generator,
+        guidance_scale=7.5
+    ).images[0]
+    final_image = upscale_image_pipeline(image)
+    final_image.save(SAVE_IMAGE_PATH)
+    # --- Lưu ảnh ---
+    image.save(SAVE_IMAGE_PATH.replace(".png", "_original.png"))
+    # # --- Lưu ảnh ---
+    # image.save(SAVE_IMAGE_PATH)
+    print(f"\n--- Hoàn thành! ---")
+    print(f"Đã lưu ảnh tại: {SAVE_IMAGE_PATH}")
+    try:
+        image.show()
+    except Exception:
+        pass
+if __name__ == "__main__":
+    main()

Agent_Diffusion/stable_diffusion.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import torch
+from diffusers.models import UNet2DConditionModel, AutoencoderKL
+from diffusers.schedulers import DDPMScheduler
+from transformers import CLIPTextModel, CLIPTokenizer
+from typing import Dict, Any, Optional
+class MiniDiffusionPipeline:
+    # config mặc định
+    DEFAULT_CONFIG: Dict[str, Any] = {
+        "beta_schedule": "scaled_linear",
+        "beta_start": 0.00085,
+        "beta_end": 0.0120,
+        "num_train_timesteps": 1000,
+        "prediction_type": "epsilon",
+        "variance_type": "fixed_small",
+        "clip_sample": False,
+        "rescale_betas_zero_snr": False,
+        "timestep_spacing": "leading",
+        "lr": 1e-4,
+        "optimizer": "AdamW",
+        "scheduler": "cosine",
+        "ema_decay": 0.9999,
+        "latent_scale": 0.18215,
+        "text_embed_dim": 768,
+        "latent_channels": 4,
+        "latent_downscale_factor": 8,
+        # --- Cấu hình kiến trúc UNet-mini ---
+        "image_size": 128,
+        "unet_block_out_channels": (256, 512, 1024),
+        "unet_layers_per_block": 1,
+        "unet_down_block_types": (
+            "CrossAttnDownBlock2D",
+            "CrossAttnDownBlock2D",
+            "DownBlock2D",
+        ),
+        "unet_up_block_types": (
+            "UpBlock2D",
+            "CrossAttnUpBlock2D",
+            "CrossAttnUpBlock2D",
+        ),
+        "unet_mid_block_type": "UNetMidBlock2DCrossAttn",
+        "unet_attention_head_dim": 8,
+    }
+    def __init__(
+        self,
+        base_model_id: str = "stabilityai/stable-diffusion-v1-5",
+        vae_model_id: Optional[str] = None,
+        device: str = "cpu",
+        config_overrides: Optional[Dict[str, Any]] = None
+    ):
+        self.device = torch.device(device)
+        self.config = {**self.DEFAULT_CONFIG, **(config_overrides or {})}
+        print(f"Đang tải Tokenizer và Text Encoder (đã đóng băng) từ {base_model_id}...")
+        self.tokenizer = self._load_tokenizer(base_model_id)
+        self.text_encoder = self._load_text_encoder(base_model_id)
+        _vae_id = vae_model_id or base_model_id
+        _vae_subfolder = "vae" if vae_model_id is None else None
+        print(f"Đang tải VAE (để fine-tune) từ {_vae_id}...")
+        self.vae = self._load_vae(_vae_id, _vae_subfolder)
+        print("Khởi tạo UNet-mini (với trọng số ngẫu nhiên)...")
+        self.unet = self._load_mini_unet()
+        print("Khởi tạo Noise Scheduler...")
+        self.noise_scheduler = self._load_noise_scheduler()
+        print("\n--- MiniDiffusionPipeline đã sẵn sàng! ---")
+        self.print_model_stats()
+    def _load_tokenizer(self, model_id: str) -> CLIPTokenizer:
+        return CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer")
+    def _load_text_encoder(self, model_id: str) -> CLIPTextModel:
+        model = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder")
+        model.to(self.device)
+        model.requires_grad_(False)
+        return model
+    def _load_vae(self, model_id: str, subfolder: Optional[str]) -> AutoencoderKL:
+        if subfolder:
+            model = AutoencoderKL.from_pretrained(model_id, subfolder=subfolder)
+        else:
+            model = AutoencoderKL.from_pretrained(model_id)
+        model.to(self.device)
+        return model
+    def _load_mini_unet(self) -> UNet2DConditionModel:
+        latent_size = self.config["image_size"] // self.config["latent_downscale_factor"]
+        unet_config = {
+            "sample_size": latent_size,
+            "in_channels": self.config["latent_channels"],
+            "out_channels": self.config["latent_channels"],
+            "block_out_channels": self.config["unet_block_out_channels"],
+            "layers_per_block": self.config["unet_layers_per_block"],
+            "down_block_types": self.config["unet_down_block_types"],
+            "up_block_types": self.config["unet_up_block_types"],
+            "mid_block_type": self.config["unet_mid_block_type"],
+            "cross_attention_dim": self.config["text_embed_dim"],
+            "attention_head_dim": self.config["unet_attention_head_dim"],
+        }
+        model = UNet2DConditionModel(**unet_config)
+        model.to(self.device)
+        return model
+    def _load_noise_scheduler(self) -> DDPMScheduler:
+        return DDPMScheduler.from_config(self.config)
+    def print_model_stats(self):
+        unet_params = sum(p.numel() for p in self.unet.parameters() if p.requires_grad)
+        vae_params = sum(p.numel() for p in self.vae.parameters() if p.requires_grad)
+        print(f"  UNet-mini (để train): {unet_params / 1_000_000:.2f} triệu tham số")
+        print(f"  VAE (để fine-tune): {vae_params / 1_000_000:.2f} triệu tham số")
+    def get_trainable_parameters(self) -> Dict[str, Any]:
+        return {
+            "unet": self.unet.parameters(),
+            "vae": self.vae.parameters()
+        }
+# --- KHỐI KI��M THỬ (SMOKE TEST) ---
+def _run_smoke_test():
+    print("--- Bắt đầu kiểm thử MiniDiffusionPipeline ---")
+    if not torch.cuda.is_available():
+        print("CẢNH BÁO: Không tìm thấy CUDA. Chạy trên CPU (sẽ chậm).")
+        device = "cpu"
+    else:
+        device = "cuda"
+    # --- Tải mặc định (dùng VAE của 1.5) ---
+    print("\n--- Tải mặc định ---")
+    pipeline_1 = MiniDiffusionPipeline(
+        base_model_id="runwayml/stable-diffusion-v1-5",
+        device=device
+    )
+    # --- Tải VAE-MSE ---
+    print("\n--- Tải VAE-MSE tùy chỉnh ---")
+    pipeline_2 = MiniDiffusionPipeline(
+        base_model_id="runwayml/stable-diffusion-v1-5",
+        vae_model_id="stabilityai/sd-vae-ft-mse",
+        device=device
+    )
+    # ---  Ghi đè config ---
+    print("\n--- Ghi đè config (UNet siêu nhỏ) ---")
+    tiny_config = {
+        "unet_block_out_channels": (128, 256, 512),
+        "lr": 5e-5
+    }
+    pipeline_3 = MiniDiffusionPipeline(
+        base_model_id="runwayml/stable-diffusion-v1-5",
+        device=device,
+        config_overrides=tiny_config
+    )
+    print("\n--- Kiểm thử thành công ---")
+    print(f"Config LR của Pipeline 1: {pipeline_1.config['lr']}")
+    print(f"Config LR của Pipeline 3: {pipeline_3.config['lr']}")
+if __name__ == "__main__":
+    _run_smoke_test()

Agent_Diffusion/train_unet.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torch.optim import AdamW
+from diffusers.optimization import get_scheduler
+from diffusers import DDPMScheduler, StableDiffusionPipeline
+from PIL import Image
+import os
+import time
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+import random
+from stable_diffusion import MiniDiffusionPipeline
+from dataset import SketchDataset
+from torchmetrics.image.fid import FrechetInceptionDistance
+from torchmetrics.multimodal.clip_score import CLIPScore
+from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
+# --- Cấu hình ---
+TRAIN_DATA_DIR = r"C:\Users\Admin\Desktop\scientific research\dataset\train"
+VAL_DATA_DIR = r"C:\Users\Admin\Desktop\scientific research\dataset\val"
+VAE_PATH = "./vae-finetuned.safetensors"
+IMAGE_SIZE = 128
+EPOCHS = 101
+BATCH_SIZE = 16 * 5
+LEARNING_RATE = 1e-4 * 5
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SAVE_UNET_PATH = "./unet-mini.safetensors"
+CHECKPOINT_PATH = "./unet_latest_checkpoint.pth"
+NUM_INFERENCE_STEPS = 50
+TINY_UNET_CONFIG = {
+    "unet_block_out_channels": (128, 256, 512),
+}
+def plot_metrics(history, filename="unet_metrics_plot_v1.png"):
+    plt.rcParams.update({'font.size': 17})
+    fig, axs = plt.subplots(2, 2, figsize=(15, 12))
+    axs[0, 0].plot(history['train_loss'], label="Train Loss")
+    axs[0, 0].plot(history['val_loss'], label="Validation Loss")
+    axs[0, 0].set_title("Train vs Validation Loss")
+    axs[0, 0].set_xlabel("Epoch")
+    axs[0, 0].set_ylabel("MSE Loss")
+    axs[0, 0].grid()
+    axs[0, 0].legend()
+    axs[0, 1].plot(history['fid'], label="FID", color='green')
+    axs[0, 1].set_title("Fréchet Inception Distance (FID)")
+    axs[0, 1].set_xlabel("Epoch")
+    axs[0, 1].set_ylabel("FID (lower is better)")
+    axs[0, 1].grid()
+    axs[0, 1].legend()
+    axs[1, 0].plot(history['lpips'], label="LPIPS", color='red')
+    axs[1, 0].set_title("Learned Perceptual Image Patch Similarity (LPIPS)")
+    axs[1, 0].set_xlabel("Epoch")
+    axs[1, 0].set_ylabel("LPIPS (lower is better)")
+    axs[1, 0].grid()
+    axs[1, 0].legend()
+    axs[1, 1].plot(history['clip_score'], label="CLIP Score", color='purple')
+    axs[1, 1].set_title("CLIP Score")
+    axs[1, 1].set_xlabel("Epoch")
+    axs[1, 1].set_ylabel("CLIP Score (higher is better)")
+    axs[1, 1].grid()
+    axs[1, 1].legend()
+    plt.tight_layout()
+    plt.savefig(filename)
+    print(f"Đã lưu biểu đồ metrics tại {filename}")
+def evaluate(
+    eval_pipeline, gen, val_loader, metrics,
+    unet, vae, text_encoder, scheduler,
+    vae_scale_factor, num_inference_steps
+):
+    unet.eval()
+    total_val_loss = 0.0
+    for metric in metrics.values():
+        metric.reset()
+    def to_uint8(images):
+        images = (images.clamp(-1, 1) + 1) / 2
+        images = (images * 255).type(torch.uint8)
+        return images
+    def to_lpips_format(images):
+        return images.clamp(-1, 1)
+    pbar = tqdm(val_loader, desc="[Validation & Evaluation]")
+    for batch in pbar:
+        images = batch["pixel_values"].to(DEVICE)
+        input_ids = batch["input_ids"].to(DEVICE)
+        with torch.no_grad():
+            # --- TÍNH VALIDATION LOSS ---
+            latents = vae.encode(images).latent_dist.mean * vae_scale_factor
+            noise = torch.randn_like(latents)
+            timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (latents.shape[0],), device=DEVICE)
+            noisy_latents = scheduler.add_noise(latents, noise, timesteps)
+            text_embeds = text_encoder(input_ids)[0]
+            noise_pred = unet(noisy_latents, timesteps, text_embeds).sample
+            val_loss = F.mse_loss(noise_pred, noise)
+            total_val_loss += val_loss.item()
+            # --- SINH ẢNH (Dùng eval_pipeline) ---
+            prompts = eval_pipeline.tokenizer.batch_decode(input_ids, skip_special_tokens=True)
+            generated_output = eval_pipeline(
+                prompt=prompts,
+                num_inference_steps=num_inference_steps,
+                output_type="pt",
+                generator=gen
+            )
+            generated_images = generated_output.images
+            generated_images_norm = (generated_images * 2) - 1
+            # --- CẬP NHẬT METRICS ---
+            gt_images_uint8 = to_uint8(images)
+            gt_images_lpips = to_lpips_format(images)
+            gen_images_uint8 = to_uint8(generated_images_norm)
+            gen_images_lpips = to_lpips_format(generated_images_norm)
+            metrics["fid"].update(gt_images_uint8, real=True)
+            metrics["fid"].update(gen_images_uint8, real=False)
+            metrics["lpips"].update(gt_images_lpips, gen_images_lpips)
+            metrics["clip_score"].update(gen_images_uint8, prompts)
+    # --- TRẢ VỀ KẾT QUẢ ---
+    results = {
+        "val_loss": total_val_loss / len(val_loader),
+        "fid": metrics["fid"].compute().item(),
+        "lpips": metrics["lpips"].compute().item(),
+        "clip_score": metrics["clip_score"].compute().item()
+    }
+    return results
+def main():
+    print("--- Giai đoạn 2: Huấn luyện UNet-mini ---")
+    start_time_total = time.time()
+    # Khởi tạo Pipeline
+    pipeline = MiniDiffusionPipeline(
+        base_model_id="runwayml/stable-diffusion-v1-5",
+        device=DEVICE,
+        config_overrides=TINY_UNET_CONFIG
+    )
+    # Tải VAE đã fine-tune
+    try:
+        pipeline.vae.load_state_dict(torch.load(VAE_PATH, map_location=DEVICE))
+        print(f"Tải VAE đã fine-tune thành công từ {VAE_PATH}")
+    except Exception as e:
+        print(f"Lỗi: Không thể tải VAE từ {VAE_PATH}. {e}")
+        print("Vui lòng chạy train_vae.py trước!")
+        return
+    pipeline.vae.requires_grad_(False)
+    pipeline.text_encoder.requires_grad_(False)
+    unet = pipeline.unet
+    vae = pipeline.vae
+    text_encoder = pipeline.text_encoder
+    tokenizer = pipeline.tokenizer
+    noise_scheduler = pipeline.noise_scheduler
+    vae_scale_factor = pipeline.config['latent_scale']
+    # Tải Dữ liệu
+    train_dataset = SketchDataset(TRAIN_DATA_DIR, tokenizer, IMAGE_SIZE)
+    val_dataset = SketchDataset(VAL_DATA_DIR, tokenizer, IMAGE_SIZE)
+    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
+    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
+    print(f"Đã tải {len(train_dataset)} ảnh train và {len(val_dataset)} ảnh val.")
+    print("Khởi tạo Evaluation Pipeline (một lần)...")
+    eval_pipeline = StableDiffusionPipeline(
+        unet=unet,
+        vae=vae,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        scheduler=noise_scheduler,
+        safety_checker=None,
+        feature_extractor=None,
+    ).to(DEVICE)
+    eval_pipeline.set_progress_bar_config(disable=True)
+    eval_pipeline.unet.eval()
+    eval_pipeline.vae.eval()
+    eval_pipeline.text_encoder.eval()
+    gen = torch.Generator(device=DEVICE).manual_seed(42)
+    optimizer = AdamW(unet.parameters(), lr=LEARNING_RATE)
+    lr_scheduler = get_scheduler(
+        name=pipeline.config['scheduler'],
+        optimizer=optimizer,
+        num_warmup_steps=500,
+        num_training_steps=(len(train_loader) * EPOCHS),
+    )
+    metrics = {
+        "fid": FrechetInceptionDistance(feature=64).to(DEVICE),
+        "lpips": LearnedPerceptualImagePatchSimilarity(net_type='vgg').to(DEVICE),
+        "clip_score": CLIPScore(model_name_or_path="openai/clip-vit-base-patch32").to(DEVICE)
+    }
+    start_epoch = 0
+    history = {
+        "train_loss": [], "val_loss": [],
+        "fid": [], "lpips": [], "clip_score": []
+    }
+    best_clip_score = 0.0
+    if os.path.exists(CHECKPOINT_PATH):
+        print(f"Phát hiện checkpoint. Đang tải từ {CHECKPOINT_PATH}...")
+        try:
+            checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE)
+            unet.load_state_dict(checkpoint['unet_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])
+            start_epoch = checkpoint['epoch']
+            history = checkpoint['history']
+            best_clip_score = checkpoint['best_clip_score']
+            print(f"Resume training từ epoch {start_epoch}")
+        except Exception as e:
+            print(f"Lỗi khi tải checkpoint: {e}. Bắt đầu lại từ đầu.")
+            start_epoch = 0
+            history = {k: [] for k in history}
+            best_clip_score = 0.0
+    else:
+        print("Không tìm thấy checkpoint. Bắt đầu training từ đầu.")
+    for epoch in range(start_epoch, EPOCHS):
+        start_time_epoch = time.time()
+        unet.train()
+        epoch_train_loss = 0.0
+        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]")
+        for batch in pbar:
+            images = batch["pixel_values"].to(DEVICE)
+            input_ids = batch["input_ids"].to(DEVICE)
+            with torch.no_grad():
+                latents = vae.encode(images).latent_dist.mean * vae_scale_factor
+                text_embeds = text_encoder(input_ids)[0]
+            noise = torch.randn_like(latents)
+            timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (latents.shape[0],), device=DEVICE)
+            noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+            noise_pred = unet(noisy_latents, timesteps, text_embeds).sample
+            loss = F.mse_loss(noise_pred, noise)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            lr_scheduler.step()
+            epoch_train_loss += loss.item()
+            pbar.set_postfix({"Loss": loss.item()})
+        avg_train_loss = epoch_train_loss / len(train_loader)
+        history["train_loss"].append(avg_train_loss)
+        # ---Chạy Đánh giá (Evaluation) ---
+        eval_results = evaluate(
+            eval_pipeline, gen, val_loader, metrics,
+            unet, vae, text_encoder, noise_scheduler,
+            vae_scale_factor, NUM_INFERENCE_STEPS
+        )
+        history["val_loss"].append(eval_results["val_loss"])
+        history["fid"].append(eval_results["fid"])
+        history["lpips"].append(eval_results["lpips"])
+        history["clip_score"].append(eval_results["clip_score"])
+        epoch_time_min = (time.time() - start_time_epoch) / 60
+        print(f"\n--- Epoch {epoch+1}/{EPOCHS} Results (Thời gian: {epoch_time_min:.2f} phút) ---")
+        print(f"  Train Loss: {avg_train_loss:.6f}")
+        print(f"  Val Loss:   {eval_results['val_loss']:.6f}")
+        print(f"  LPIPS:      {eval_results['lpips']:.4f} (↓)")
+        print(f"  FID:        {eval_results['fid']:.4f} (↓)")
+        print(f"  CLIP Score: {eval_results['clip_score']:.4f} (↑)")
+        if eval_results['clip_score'] > best_clip_score:
+            best_clip_score = eval_results['clip_score']
+            torch.save(unet.state_dict(), SAVE_UNET_PATH)
+            print(f"Đã lưu UNet *tốt nhất* mới tại {SAVE_UNET_PATH} (CLIP Score: {best_clip_score:.4f})")
+        print(f"Đang lưu checkpoint cuối cùng tại {CHECKPOINT_PATH}...")
+        checkpoint = {
+            'epoch': epoch + 1,
+            'unet_state_dict': unet.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'lr_scheduler_state_dict': lr_scheduler.state_dict(),
+            'history': history,
+            'best_clip_score': best_clip_score
+        }
+        torch.save(checkpoint, CHECKPOINT_PATH)
+    total_time_min = (time.time() - start_time_total) / 60
+    print(f"\n--- Hoàn thành Giai đoạn 2 ---")
+    print(f"Tổng thời gian chạy (phiên này): {total_time_min:.2f} phút")
+    print(f"UNet đã train (tốt nhất) được lưu tại: {SAVE_UNET_PATH}")
+    if history['train_loss']:
+        plot_metrics(history, "unet_metrics_plot_v1.png")
+    else:
+        print("Không có dữ liệu history để vẽ biểu đồ.")
+if __name__ == "__main__":
+    main()

Agent_Diffusion/train_vae.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from torch.optim import AdamW
+from PIL import Image
+import os
+import time
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from stable_diffusion import MiniDiffusionPipeline
+from dataset import SketchDataset
+# --- Cấu hình ---
+TRAIN_DATA_DIR = r"C:\Users\Admin\Desktop\scientific research\dataset\train"
+VAL_DATA_DIR = r"C:\Users\Admin\Desktop\scientific research\dataset\val"
+IMAGE_SIZE = 128
+EPOCHS = 36
+BATCH_SIZE = 16
+LEARNING_RATE = 1e-5
+SAVE_PATH = "vae-finetuned.safetensors"
+CHECKPOINT_PATH = "vae_latest_checkpoint.pth"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+def plot_losses(train_losses, val_losses, filename="vae_loss_plot_v1.png"):
+    plt.figure(figsize=(10, 5))
+    plt.plot(train_losses, label="Train Loss")
+    plt.plot(val_losses, label="Validation Loss")
+    plt.title("VAE Fine-tuning Loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("MSE Loss")
+    plt.grid()
+    plt.ylim(0.085,0.12)
+    plt.legend()
+    plt.savefig(filename)
+    print(f"Đã lưu biểu đồ loss tại {filename}")
+def main():
+    print("--- Giai đoạn 1: Fine-tuning VAE ---")
+    pipeline = MiniDiffusionPipeline(
+        base_model_id="runwayml/stable-diffusion-v1-5",
+        vae_model_id="stabilityai/sd-vae-ft-mse",
+        device=DEVICE
+    )
+    vae = pipeline.vae
+    tokenizer = pipeline.tokenizer
+    vae_scale_factor = pipeline.config['latent_scale']
+    train_dataset = SketchDataset(TRAIN_DATA_DIR, tokenizer, IMAGE_SIZE)
+    val_dataset = SketchDataset(VAL_DATA_DIR, tokenizer, IMAGE_SIZE)
+    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
+    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
+    print(f"Đã tải {len(train_dataset)} ảnh train và {len(val_dataset)} ảnh val.")
+    optimizer = AdamW(vae.parameters(), lr=LEARNING_RATE)
+    start_epoch = 0
+    train_losses = []
+    val_losses = []
+    best_val_loss = float('inf')
+    if os.path.exists(CHECKPOINT_PATH):
+        print(f"Phát hiện checkpoint. Đang tải từ {CHECKPOINT_PATH}...")
+        try:
+            checkpoint = torch.load(CHECKPOINT_PATH, map_location=DEVICE)
+            vae.load_state_dict(checkpoint['vae_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            start_epoch = checkpoint['epoch'] # Đây là epoch *tiếp theo*
+            train_losses = checkpoint['train_losses']
+            val_losses = checkpoint['val_losses']
+            best_val_loss = checkpoint['best_val_loss']
+            print(f"Resume training từ epoch {start_epoch}")
+        except Exception as e:
+            print(f"Lỗi khi tải checkpoint: {e}. Bắt đầu lại từ đầu.")
+            start_epoch = 0
+            train_losses = []
+            val_losses = []
+            best_val_loss = float('inf')
+    else:
+        print("Không tìm thấy checkpoint. Bắt đầu training từ đầu.")
+    start_time = time.time()
+    for epoch in range(start_epoch, EPOCHS):
+        vae.train()
+        epoch_train_loss = 0.0
+        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]")
+        for batch in pbar:
+            images = batch["pixel_values"].to(DEVICE)
+            posterior = vae.encode(images).latent_dist
+            latents = posterior.mean * vae_scale_factor
+            reconstructions = vae.decode(latents / vae_scale_factor).sample
+            loss = F.mse_loss(reconstructions, images)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            epoch_train_loss += loss.item()
+            pbar.set_postfix({"Loss": loss.item()})
+        avg_train_loss = epoch_train_loss / len(train_loader)
+        train_losses.append(avg_train_loss)
+        vae.eval()
+        epoch_val_loss = 0.0
+        with torch.no_grad():
+            pbar_val = tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Val]")
+            for batch in pbar_val:
+                images = batch["pixel_values"].to(DEVICE)
+                posterior = vae.encode(images).latent_dist
+                latents = posterior.mean * vae_scale_factor
+                reconstructions = vae.decode(latents / vae_scale_factor).sample
+                loss = F.mse_loss(reconstructions, images)
+                epoch_val_loss += loss.item()
+        avg_val_loss = epoch_val_loss / len(val_loader)
+        val_losses.append(avg_val_loss)
+        print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {avg_train_loss:.6f} - Val Loss: {avg_val_loss:.6f}")
+        if avg_val_loss < best_val_loss:
+            best_val_loss = avg_val_loss
+            torch.save(vae.state_dict(), SAVE_PATH)
+            print(f"Đã lưu VAE *tốt nhất* mới tại {SAVE_PATH} (Val Loss: {best_val_loss:.6f})")
+        print(f"Đang lưu checkpoint cuối cùng tại {CHECKPOINT_PATH}...")
+        checkpoint = {
+            'epoch': epoch + 1,
+            'vae_state_dict': vae.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'train_losses': train_losses,
+            'val_losses': val_losses,
+            'best_val_loss': best_val_loss
+        }
+        torch.save(checkpoint, CHECKPOINT_PATH)
+    end_time = time.time()
+    total_time_min = (end_time - start_time) / 60
+    print(f"\n--- Hoàn thành Giai đoạn 1 ---")
+    print(f"Tổng thời gian chạy (phiên này): {total_time_min:.2f} phút")
+    print(f"VAE đã fine-tune (tốt nhất) được lưu tại: {SAVE_PATH}")
+    if train_losses and val_losses:
+        plot_losses(train_losses, val_losses, "vae_loss_plot.png")
+if __name__ == "__main__":
+    main()

Agent_Diffusion/unet-mini.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:291a53f1a95a6893a7c06d9e8cbe06667b9e2bd53bcbaa87636b509ba4529821
+size 208784907

Agent_Diffusion/vae-finetuned.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd73806bd9362c4d5ad89e5017178db324d5e49d2efe9e0dd04c1a2871395b32
+size 334713859

Conservative Q-learning/Agen1_training.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import json
+import numpy as np
+from torch.utils.data import DataLoader, Dataset
+from transformers import T5Tokenizer, T5EncoderModel
+# Import từ các file bạn đã cung cấp
+from cql_agent import CQLAgent
+from cql_utils import DataNormalizer
+# --- 1. ĐỊNH NGHĨA DATASET ---
+class SketchOfflineDataset(Dataset):
+    def __init__(self, file_path):
+        self.data = []
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                self.data.append(json.loads(line))
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        return (
+            np.array(item['state'], dtype=np.float32),
+            item['action'],
+            item['reward'],
+            np.array(item['next_state'], dtype=np.float32),
+            item['done']
+        )
+# --- 2. HÀM HUẤN LUYỆN ---
+def train_agent_1(epoch):
+    print("🚀 Khởi động quá trình huấn luyện Agent Chính...")
+    # Cấu hình
+    FILE_PATH = "massive_diverse_sketch_dataset.json"
+    STATE_DIM = 768  # Tương ứng đầu ra t5-base
+    ACTION_DIM = 3   # 0: Chat, 1: Sketch, 2: Reject
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load Dữ liệu
+    full_dataset = SketchOfflineDataset(FILE_PATH)
+    train_size = int(0.8 * len(full_dataset))
+    test_size = len(full_dataset) - train_size
+    train_dataset, _ = torch.utils.data.random_split(full_dataset, [train_size, test_size])
+    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
+    # Khởi tạo Agent (Sử dụng Discrete CQL vì Action là 0, 1, 2)
+    agent = CQLAgent(state_dim=STATE_DIM, action_dim=ACTION_DIM, is_continuous=False, device=DEVICE)
+    # Chuẩn hóa dữ liệu (Normalization)
+    all_states = np.array([item['state'] for item in full_dataset.data])
+    agent.normalizer.fit(all_states)
+    # Vòng lặp huấn luyện
+    epochs = epoch
+    for epoch in range(epochs):
+        total_loss = 0
+        for batch in train_loader:
+            # Batch: states, actions, rewards, next_states, dones
+            metrics = agent.train_step(batch)
+            total_loss += metrics['critic_loss']
+        print(f"Epoch {epoch+1}/{epochs} | Critic Loss: {total_loss/len(train_loader):.4f}")
+    # Lưu mô hình
+    agent.save_model("saved_agent_1")
+    return agent
+# --- 3. HÀM KIỂM THỬ (TEST) ---
+class Agent1Inference:
+    def __init__(self, model_path="saved_agent_1"):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Load T5 để phân tích prompt
+        self.tokenizer = T5Tokenizer.from_pretrained("t5-base")
+        self.encoder = T5EncoderModel.from_pretrained("t5-base").to(self.device)
+        # Load CQL để ra quyết định
+        self.agent = CQLAgent(state_dim=768, action_dim=3, is_continuous=False, device=self.device)
+        self.agent.load_model(model_path)
+    def get_action(self, text_prompt):
+        # Bước 1: T5 phân tích -> Embedding
+        inputs = self.tokenizer(text_prompt, return_tensors="pt", padding=True).to(self.device)
+        with torch.no_grad():
+            embedding = self.encoder(**inputs).last_hidden_state.mean(dim=1).cpu().numpy().flatten()
+        # Bước 2: CQL ra quyết định
+        action_idx = self.agent.select_action(embedding)
+        mapping = {0: "Kích hoạt Agent Giao tiếp", 1: "Kích hoạt Agent Vẽ ảnh (Sketch)", 2: "Từ chối/Yêu cầu làm rõ"}
+        return mapping[action_idx]
+# --- CHẠY CHƯƠNG TRÌNH ---
+if __name__ == "__main__":
+    # 1. Train
+    trained_agent = train_agent_1(1000)
+    # 2. Test thử mô hình đã lưu
+    print("\n--- BẮT ĐẦU TEST AGENT CHÍNH ---")
+    tester = Agent1Inference("saved_agent_1")
+    test_prompts = [
+        "Vẽ cho mình một bức chân dung cụ ông sketch",
+        "Chào bot, hôm nay bạn thế nào?",
+        "Hãy vẽ một bông hoa hồng bằng màu dầu rực rỡ",
+        "Kí họa nhanh khuôn mặt cô gái đang cười"
+    ]
+    for p in test_prompts:
+        action = tester.get_action(p)
+        print(f"User: {p} \n=> Agent 1 quyết định: {action}\n")

Conservative Q-learning/cql_agent.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+import numpy as np
+import copy
+import os
+from cql_utils import MLP, TanhGaussianPolicy, DataNormalizer
+class CQLAgent:
+    def __init__(
+        self,
+        state_dim,
+        action_dim,
+        device='cuda' if torch.cuda.is_available() else 'cpu',
+        is_continuous=True,
+        hidden_dim=256,
+        lr=3e-4,
+        cql_weight=1.0,
+        temp=1.0,
+        gamma=0.99,
+        tau=0.005
+    ):
+        self.state_dim = state_dim
+        self.action_dim = action_dim
+        self.device = torch.device(device)
+        self.is_continuous = is_continuous
+        self.cql_weight = cql_weight
+        self.temp = temp # logsumexp
+        self.gamma = gamma
+        self.tau = tau # update coefficient
+        self.normalizer = DataNormalizer(state_dim)
+        if self.is_continuous:
+            self.actor = TanhGaussianPolicy(state_dim, action_dim, hidden_dim).to(self.device)
+            self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
+            # Critic (Continuous: input = State + action)
+            # using 2 Critics to reduce overstimation (Double Q-learning)
+            self.critic_1 = MLP(state_dim + action_dim, 1, hidden_dim).to(self.device)
+            self.critic_2 = MLP(state_dim + action_dim, 1, hidden_dim).to(self.device)
+            self.target_critic_1 = copy.deepcopy(self.critic_1)
+            self.target_critic_2 = copy.deepcopy(self.critic_2)
+        else:
+            # Discrete: Actor is argmax Q, Critic is Q-network (Input = state, output = Q of actions)
+            self.critic_1 = MLP(state_dim, action_dim, hidden_dim).to(self.device)
+            self.target_critic_1 = copy.deepcopy(self.critic_1)
+            # we alo can use 2 network to more robust but now we just use 1 network to check and we can update it later
+        params = list(self.critic_1.parameters())
+        if self.is_continuous:
+            params += list(self.critic_2.parameters())
+        self.critic_optimizer = optim.Adam(params, lr=lr)
+        # auto fine-tune Alpha (Entropy) for SAC (just Continuous)
+        if self.is_continuous:
+            self.target_entropy = -action_dim
+            self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
+            self.alpha_optimizer = optim.Adam([self.log_alpha], lr=lr)
+    def select_action(self, state, evaluate=True):
+        if not isinstance(state, np.ndarray):
+            state = np.array(state)
+        state = self.normalizer.normalize(state)
+        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            if self.is_continuous:
+                action, _ = self.actor(state)
+                return action.cpu().data.numpy().flatten()
+            else:
+                q_values = self.critic_1(state)
+                return q_values.argmax(dim=1).cpu().data.numpy().item()
+    def _compute_cql_loss(self, q_values_pred, states, actions):
+        if self.is_continuous:
+            batch_size = states.shape[0]
+            num_samples = 10
+            # Taking random samples
+            random_actions = torch.FloatTensor(batch_size * num_samples, self.action_dim).uniform_(-1, 1).to(self.device)
+            # Taking actions from current Policy
+            curr_actions, curr_log_pi = self.actor(states.repeat_interleave(num_samples, dim=0))
+            # Calculating Q for each these samples
+            states_repeated = states.repeat_interleave(num_samples, dim=0)
+            # grafting states and random actions and current actions
+            q1_rand = self.critic_1(torch.cat([states_repeated, random_actions], dim=1))
+            q1_curr = self.critic_1(torch.cat([states_repeated, curr_actions], dim=1))
+            # gathering: [batch, num_samples, 1]
+            q1_rand = q1_rand.view(batch_size, num_samples, 1)
+            q1_curr = q1_curr.view(batch_size, num_samples, 1)
+            # merging to calculating LogSumExp
+            cat_q1 = torch.cat([q1_rand, q1_curr], dim=1)
+            # CQL Loss 1: log(sum(exp(Q_ood)))
+            cql_loss_1 = torch.logsumexp(cat_q1 / self.temp, dim=1).mean() * self.temp
+            # CQL Loss 2: - Q_data (Maximizing Q-value of data samples)
+            cql_loss_2 = q_values_pred.mean()
+            return (cql_loss_1 - cql_loss_2) * self.cql_weight
+        else:
+            # q_values_pred shape: [batch, action_dim] (calculated for all action space)
+            # CQL Loss 1: log(sum(exp(Q_all))) - calculate for all action space
+            cql_loss_1 = torch.logsumexp(q_values_pred / self.temp, dim=1).mean() * self.temp
+            # CQL Loss 2: - Q_data (takeing Q-value at real action in batch)
+            # actions shape: [batch, 1]
+            q_data = q_values_pred.gather(1, actions.long())
+            cql_loss_2 = q_data.mean()
+            return (cql_loss_1 - cql_loss_2) * self.cql_weight
+    def train_step(self, batch):
+        states, actions, rewards, next_states, dones = batch
+        states = torch.FloatTensor(states).to(self.device)
+        actions = torch.FloatTensor(actions).to(self.device)
+        rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)
+        next_states = torch.FloatTensor(next_states).to(self.device)
+        dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)
+        # Update Critic (Q-Functions)
+        with torch.no_grad():
+            if self.is_continuous:
+                next_actions, next_log_pi = self.actor(next_states)
+                q1_target = self.target_critic_1(torch.cat([next_states, next_actions], dim=1))
+                q2_target = self.target_critic_2(torch.cat([next_states, next_actions], dim=1))
+                min_q_target = torch.min(q1_target, q2_target)
+                # Soft Actor-Critic Target (Having entropy)
+                alpha = self.log_alpha.exp()
+                q_target = rewards + (1 - dones) * self.gamma * (min_q_target - alpha * next_log_pi)
+            else:
+                # DQN Target
+                # Double DQN logic
+                q_next = self.target_critic_1(next_states)
+                max_q_next, _ = torch.max(q_next, dim=1, keepdim=True)
+                q_target = rewards + (1 - dones) * self.gamma * max_q_next
+        # calculating Q hiện tại
+        if self.is_continuous:
+            q1_pred = self.critic_1(torch.cat([states, actions], dim=1))
+            q2_pred = self.critic_2(torch.cat([states, actions], dim=1))
+            mse_loss = F.mse_loss(q1_pred, q_target) + F.mse_loss(q2_pred, q_target)
+            # adding CQL Loss
+            cql_loss = self._compute_cql_loss(q1_pred, states, actions) + \
+                       self._compute_cql_loss(q2_pred, states, actions)
+        else:
+            q_all = self.critic_1(states)
+            q_pred = q_all.gather(1, actions.long())
+            mse_loss = F.mse_loss(q_pred, q_target)
+            # Thêm CQL Loss
+            cql_loss = self._compute_cql_loss(q_all, states, actions)
+        total_critic_loss = mse_loss + cql_loss
+        self.critic_optimizer.zero_grad()
+        total_critic_loss.backward()
+        self.critic_optimizer.step()
+        # Update Actor (just Continuous)
+        actor_loss_val = 0
+        if self.is_continuous:
+            new_actions, log_pi = self.actor(states)
+            q1_new = self.critic_1(torch.cat([states, new_actions], dim=1))
+            q2_new = self.critic_2(torch.cat([states, new_actions], dim=1))
+            min_q_new = torch.min(q1_new, q2_new)
+            # SAC Actor Loss: Maximize (Q - alpha * log_prob) -> Minimize (alpha * log_prob - Q)
+            actor_loss = (alpha * log_pi - min_q_new).mean()
+            self.actor_optimizer.zero_grad()
+            actor_loss.backward()
+            self.actor_optimizer.step()
+            # Update Alpha (Temperature)
+            alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean()
+            self.alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self.alpha_optimizer.step()
+            actor_loss_val = actor_loss.item()
+        #  Soft Update Target Networks ===
+        self._soft_update(self.critic_1, self.target_critic_1)
+        if self.is_continuous:
+            self._soft_update(self.critic_2, self.target_critic_2)
+        return {
+            "critic_loss": total_critic_loss.item(),
+            "cql_loss": cql_loss.item(),
+            "actor_loss": actor_loss_val
+        }
+    def _soft_update(self, local_model, target_model):
+        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
+            target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)
+    def save_model(self, path):
+        os.makedirs(path, exist_ok=True)
+        state_dict = {
+            'is_continuous': self.is_continuous,
+            'critic_1': self.critic_1.state_dict(),
+        }
+        if self.is_continuous:
+            state_dict.update({
+                'critic_2': self.critic_2.state_dict(),
+                'actor': self.actor.state_dict(),
+                'log_alpha': self.log_alpha
+            })
+        torch.save(state_dict, os.path.join(path, "cql_model.pth"))
+        self.normalizer.save(os.path.join(path, "normalizer.pkl"))
+        print(f"Model saved to {path}")
+    def load_model(self, path):
+        model_path = os.path.join(path, "cql_model.pth")
+        if not os.path.exists(model_path):
+            print("No model found!")
+            return
+        checkpoint = torch.load(model_path, map_location=self.device)
+        self.critic_1.load_state_dict(checkpoint['critic_1'])
+        if self.is_continuous and checkpoint['is_continuous']:
+            self.critic_2.load_state_dict(checkpoint['critic_2'])
+            self.actor.load_state_dict(checkpoint['actor'])
+            self.log_alpha = checkpoint['log_alpha']
+        self.normalizer.load(os.path.join(path, "normalizer.pkl"))
+        print(f"Model loaded from {path}")

Conservative Q-learning/cql_utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.distributions import Normal
+import numpy as np
+import pickle
+import os
+class DataNormalizer:
+    def __init__(self, state_dim):
+        self.mean = np.zeros(state_dim)
+        self.std = np.zeros(state_dim)
+        self.std[self.std < 1e-6] = 1.0
+    def fit(self, states):
+        self.mean = np.mean(states, axis=0)
+        self.std = np.std(states, axis=0)
+    def normalize(self, states):
+        return (states - self.mean) / self.std
+    def denormalize(self, states):
+        return states * self.std + self.mean
+    def save(self, path):
+        with open(path, 'wb') as f:
+            pickle.dump({'mean': self.mean, 'std': self.std}, f)
+    def load(self, path):
+        with open(path, 'rb') as f:
+            data = pickle.load(f)
+            self.mean = data['mean']
+            self.std = data['std']
+class MLP(nn.Module):
+    def __init__(self, input_dim, output_dim, hidden_dim=256, n_layers=2):
+        super().__init__()
+        layers = []
+        layers.append(nn.Linear(input_dim, hidden_dim))
+        layers.append(nn.ReLU())
+        for _ in range(n_layers - 1):
+            layers.append(nn.Linear(hidden_dim, hidden_dim))
+            layers.append(nn.ReLU())
+        layers.append(nn.Linear(hidden_dim, output_dim))
+        self.net = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.net(x)
+class TanhGaussianPolicy(nn.Module):
+    def __init__(self, state_dim, action_dim, hidden_dim=256):
+        super().__init__()
+        self.base = MLP(state_dim, hidden_dim, hidden_dim)
+        self.mu_head = nn.Linear(hidden_dim, action_dim)
+        self.log_std_head = nn.Linear(hidden_dim, action_dim)
+    def forward(self, state):
+        # x = self.base.net[:-1](state) # getting feature from MLP base
+        x = self.base(state)
+        x = F.relu(x)
+        mu = self.mu_head(x)
+        log_std = self.log_std_head(x)
+        log_std = torch.clamp(log_std, -20, 2)
+        std = torch.exp(log_std)
+        dist = Normal(mu, std)
+        # Reparameterization trick: a = mu + std * epsilon
+        x_t = dist.rsample()
+        action = torch.tanh(x_t) # force to [-1, 1]
+        #calculating log probability
+        log_prob = dist.log_prob(x_t)
+        log_prob -= torch.log(1 - action.pow(2) + 1e-6)
+        log_prob = log_prob.sum(1, keepdim=True)
+        return action, log_prob

Conservative Q-learning/saved_agent_1/cql_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ff7062d0d7b6d0ff647071ef0d21ccb7121770d6c51aeb39ed8ad10f882758
+size 1056877

Conservative Q-learning/saved_agent_1/normalizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac608983b3f70024d8ec8b7efd1e58239d70a56f0c926d091a6a5267b56adcb3
+size 12491

app.py ADDED Viewed

	@@ -0,0 +1,432 @@

+"""
+Streamlit App - Beautiful UI for CQL Chatbot System
+"""
+import streamlit as st
+import plotly.graph_objects as go
+from datetime import datetime
+import config
+from chatbot_engine import CQLChatbot
+from memory_manager import MemoryManager
+# Page configuration
+st.set_page_config(
+    page_title=config.APP_TITLE,
+    page_icon=config.APP_ICON,
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for beautiful Dark Galaxy UI
+st.markdown("""
+<style>
+    /* Main container - Dark Galaxy Background */
+    .main {
+        background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
+    }
+    /* Chat container */
+    .stChatMessage {
+        background-color: rgba(30, 30, 46, 0.8);
+        border-radius: 16px;
+        padding: 18px;
+        margin: 12px 0;
+        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
+        backdrop-filter: blur(10px);
+    }
+    /* User message - Purple gradient */
+    .stChatMessage[data-testid="user-message"] {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border: 1px solid rgba(255, 255, 255, 0.1);
+    }
+    /* Assistant message - Dark with blue accent */
+    .stChatMessage[data-testid="assistant-message"] {
+        background: linear-gradient(135deg, #2d3748 0%, #1a202c 100%);
+        border-left: 4px solid #667eea;
+        color: #e2e8f0;
+    }
+    /* Sidebar - Dark theme */
+    section[data-testid="stSidebar"] {
+        background: linear-gradient(180deg, #1a1a2e 0%, #16213e 100%);
+        border-right: 1px solid rgba(102, 126, 234, 0.2);
+    }
+    section[data-testid="stSidebar"] * {
+        color: #e2e8f0 !important;
+    }
+    /* Headers - Light text */
+    h1, h2, h3, h4, h5, h6 {
+        color: #f7fafc !important;
+        text-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
+    }
+    /* Paragraph text */
+    p, span, div {
+        color: #cbd5e0;
+    }
+    /* Buttons - Purple gradient */
+    .stButton>button {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border: none;
+        border-radius: 12px;
+        padding: 12px 24px;
+        font-weight: 600;
+        transition: all 0.3s ease;
+        box-shadow: 0 4px 8px rgba(102, 126, 234, 0.3);
+    }
+    .stButton>button:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 6px 16px rgba(102, 126, 234, 0.5);
+    }
+    /* Input box - Dark with glow */
+    .stTextInput>div>div>input, .stChatInput>div>div>input {
+        border-radius: 12px;
+        border: 2px solid rgba(102, 126, 234, 0.3);
+        padding: 14px;
+        background-color: rgba(30, 30, 46, 0.6);
+        color: #e2e8f0;
+        transition: all 0.3s ease;
+    }
+    .stTextInput>div>div>input:focus, .stChatInput>div>div>input:focus {
+        border-color: #667eea;
+        box-shadow: 0 0 12px rgba(102, 126, 234, 0.4);
+    }
+    /* Slider */
+    .stSlider>div>div>div {
+        background-color: rgba(102, 126, 234, 0.3);
+    }
+    /* Metrics - Dark cards */
+    .stMetric {
+        background: linear-gradient(135deg, #2d3748 0%, #1a202c 100%);
+        padding: 18px;
+        border-radius: 12px;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
+        border: 1px solid rgba(102, 126, 234, 0.2);
+    }
+    .stMetric label {
+        color: #a0aec0 !important;
+    }
+    .stMetric [data-testid="stMetricValue"] {
+        color: #667eea !important;
+    }
+    /* Action badges - Glowing */
+    .action-badge {
+        display: inline-block;
+        padding: 6px 16px;
+        border-radius: 20px;
+        font-size: 0.85em;
+        font-weight: 600;
+        margin-left: 8px;
+        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+    }
+    .action-0 {
+        background: linear-gradient(135deg, #4299e1 0%, #3182ce 100%);
+        color: white;
+    }
+    .action-1 {
+        background: linear-gradient(135deg, #9f7aea 0%, #805ad5 100%);
+        color: white;
+    }
+    .action-2 {
+        background: linear-gradient(135deg, #ed8936 0%, #dd6b20 100%);
+        color: white;
+    }
+    /* Divider */
+    hr {
+        border-color: rgba(102, 126, 234, 0.2);
+    }
+    /* Expander */
+    .streamlit-expanderHeader {
+        background-color: rgba(30, 30, 46, 0.6);
+        color: #e2e8f0 !important;
+        border-radius: 8px;
+    }
+    /* Caption text */
+    .stCaption {
+        color: #a0aec0 !important;
+    }
+    /* Success/Info/Warning boxes */
+    .stSuccess, .stInfo, .stWarning {
+        background-color: rgba(30, 30, 46, 0.8);
+        border-radius: 8px;
+        color: #e2e8f0;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Initialize session state
+def init_session_state():
+    """Initialize Streamlit session state"""
+    # Initialize memory manager
+    if 'memory_manager' not in st.session_state:
+        st.session_state.memory_manager = MemoryManager()
+        st.session_state.memory_manager.create_new_session()
+    if 'chatbot' not in st.session_state:
+        with st.spinner('🚀 Đang khởi tạo CQL Chatbot System...'):
+            st.session_state.chatbot = CQLChatbot(
+                memory_manager=st.session_state.memory_manager
+            )
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
+    if 'action_history' not in st.session_state:
+        st.session_state.action_history = []
+def display_action_badge(action: int, action_name: str):
+    """Display action badge with color coding"""
+    badge_html = f'<span class="action-badge action-{action}">{config.ACTION_DESCRIPTIONS[action]}</span>'
+    return badge_html
+def create_action_distribution_chart(distribution: dict, chart_id: str = "action_dist"):
+    """Create a pie chart for action distribution"""
+    if not distribution or sum(distribution.values()) == 0:
+        return None
+    labels = list(distribution.keys())
+    values = list(distribution.values())
+    fig = go.Figure(data=[go.Pie(
+        labels=labels,
+        values=values,
+        hole=0.4,
+        marker=dict(colors=['#4A90E2', '#7b1fa2', '#f57c00']),
+        textinfo='label+percent',
+        textfont=dict(size=12)
+    )])
+    fig.update_layout(
+        title="Phân bố hành động",
+        height=300,
+        showlegend=True,
+        margin=dict(l=20, r=20, t=40, b=20),
+        # Add unique identifier
+        updatemenus=[],
+        sliders=[]
+    )
+    return fig
+def create_q_values_chart(q_values: list, chart_id: str = "q_values"):
+    """Create a bar chart for Q-values"""
+    actions = [config.ACTION_DESCRIPTIONS[i] for i in range(len(q_values))]
+    fig = go.Figure(data=[go.Bar(
+        x=actions,
+        y=q_values,
+        marker=dict(
+            color=q_values,
+            colorscale='Viridis',
+            showscale=True
+        ),
+        text=[f'{v:.2f}' for v in q_values],
+        textposition='auto',
+    )])
+    fig.update_layout(
+        title="Q-Values (Giá trị hành động)",
+        xaxis_title="Hành động",
+        yaxis_title="Q-Value",
+        height=300,
+        showlegend=False,
+        margin=dict(l=20, r=20, t=40, b=20),
+        # Add unique identifier
+        updatemenus=[],
+        sliders=[]
+    )
+    return fig
+def main():
+    """Main Streamlit app"""
+    # Initialize
+    init_session_state()
+    # Fixed temperature (không hiển thị cho user)
+    FIXED_TEMPERATURE = 0.7
+    # Header
+    st.title(f"{config.APP_ICON} Chatbot Wisdom")
+    st.markdown("### 🧠 Hệ thống Multi-Agent với Conservative Q-Learning")
+    st.markdown("---")
+    # Sidebar
+    with st.sidebar:
+        st.header(config.SIDEBAR_TITLE)
+        # New chat button
+        if st.button("🆕 Cuộc trò chuyện mới", use_container_width=True):
+            st.session_state.messages = []
+            st.session_state.action_history = []
+            st.session_state.chatbot.clear_history()
+            # Create new session in memory
+            st.session_state.memory_manager.create_new_session()
+            st.success("✅ Đã tạo cuộc trò chuyện mới!")
+            st.rerun()
+        st.divider()
+        # Session history
+        st.subheader("💾 Lịch sử hội thoại")
+        sessions = st.session_state.memory_manager.get_all_sessions()
+        if sessions:
+            st.caption(f"Tổng cộng: {len(sessions)} phiên")
+            # Show last 5 sessions
+            for session in sessions[:5]:
+                session_id = session['session_id']
+                created_at = datetime.fromisoformat(session['created_at']).strftime("%d/%m/%Y %H:%M")
+                msg_count = session['message_count']
+                col1, col2 = st.columns([3, 1])
+                with col1:
+                    st.caption(f"📝 {created_at} ({msg_count} tin nhắn)")
+                with col2:
+                    if st.button("🗑️", key=f"del_{session_id}"):
+                        st.session_state.memory_manager.delete_session(session_id)
+                        st.rerun()
+        else:
+            st.info("Chưa có lịch sử")
+        st.divider()
+        # Agent status
+        st.subheader("🤖 Trạng thái Agent")
+        st.success("✅ CQL Agent: Hoạt động")
+        st.success("✅ Communication Agent: Hoạt động")
+        st.success("✅ Drawing Agent: Hoạt động")
+        st.divider()
+        # Statistics
+        st.subheader("📊 Thống kê")
+        st.metric("Số tin nhắn", len(st.session_state.messages))
+        # Action distribution
+        if st.session_state.action_history:
+            distribution = st.session_state.chatbot.get_action_distribution()
+            fig = create_action_distribution_chart(distribution, chart_id=f"dist_{len(st.session_state.action_history)}")
+            if fig:
+                st.plotly_chart(fig, use_container_width=True, key=f"action_dist_{len(st.session_state.action_history)}")
+        st.divider()
+        # Info
+        st.subheader("ℹ️ Thông tin")
+        st.info(f"""
+        **Model**: CQL Agent
+        **State Dim**: {config.STATE_DIM}
+        **Actions**: {config.ACTION_DIM}
+        **Device**: {config.DEVICE}
+        """)
+    # Main chat area
+    chat_container = st.container()
+    # Display chat messages
+    with chat_container:
+        for idx, message in enumerate(st.session_state.messages):
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+                # Display action badge for assistant messages
+                if message["role"] == "assistant" and "action" in message:
+                    action = message["action"]
+                    action_name = message["action_name"]
+                    st.markdown(
+                        display_action_badge(action, action_name),
+                        unsafe_allow_html=True
+                    )
+                    # Display Q-values chart
+                    if "q_values" in message:
+                        with st.expander("📊 Xem Q-Values"):
+                            fig = create_q_values_chart(message["q_values"], chart_id=f"qval_{idx}")
+                            st.plotly_chart(fig, use_container_width=True, key=f"q_values_{idx}")
+                # Display image if available
+                if message.get("image_path"):
+                    st.image(message["image_path"])
+    # Chat input
+    if prompt := st.chat_input("💬 Nhập tin nhắn của bạn..."):
+        # Add user message
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Display user message
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Generate response với fixed temperature
+        with st.chat_message("assistant"):
+            with st.spinner("🤔 Đang suy nghĩ..."):
+                response_data = st.session_state.chatbot.chat(prompt, FIXED_TEMPERATURE)
+            # Display response
+            st.markdown(response_data['response'])
+            # Display action badge
+            st.markdown(
+                display_action_badge(response_data['action'], response_data['action_name']),
+                unsafe_allow_html=True
+            )
+            # Display Q-values
+            with st.expander("📊 Xem Q-Values"):
+                msg_idx = len(st.session_state.messages)
+                fig = create_q_values_chart(response_data['q_values'], chart_id=f"qval_new_{msg_idx}")
+                st.plotly_chart(fig, use_container_width=True, key=f"q_values_new_{msg_idx}")
+            # Display image if available
+            if response_data.get('image_path'):
+                st.image(response_data['image_path'])
+        # Add assistant message to history
+        st.session_state.messages.append({
+            "role": "assistant",
+            "content": response_data['response'],
+            "action": response_data['action'],
+            "action_name": response_data['action_name'],
+            "q_values": response_data['q_values'],
+            "image_path": response_data.get('image_path')
+        })
+        # Update action history
+        st.session_state.action_history.append(response_data['action'])
+        # Rerun to update UI
+        st.rerun()
+if __name__ == "__main__":
+    main()

chatbot_engine.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""
+CQL Chatbot Engine - Main chatbot logic integrating CQL agent with multi-agent system
+"""
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'Conservative Q-learning'))
+import torch
+import numpy as np
+from transformers import T5Tokenizer, T5EncoderModel
+from typing import Dict, List, Tuple
+import config
+from memory_manager import MemoryManager
+from cql_agent import CQLAgent
+from communication_agent import CommunicationAgent
+from drawing_agent import DrawingAgent
+class CQLChatbot:
+    def __init__(self, model_path: str = None, memory_manager: MemoryManager = None):
+        """
+        Initialize CQL Chatbot with all components
+        Args:
+            model_path: Path to saved CQL model
+            memory_manager: Memory manager instance for conversation storage
+        """
+        print("🚀 Initializing CQL Chatbot System...")
+        # Set device
+        self.device = torch.device(config.DEVICE if torch.cuda.is_available() else 'cpu')
+        print(f"📱 Using device: {self.device}")
+        # Load T5 Encoder for text embedding
+        print("📚 Loading T5 encoder...")
+        self.tokenizer = T5Tokenizer.from_pretrained(config.T5_MODEL_NAME)
+        self.encoder = T5EncoderModel.from_pretrained(config.T5_MODEL_NAME).to(self.device)
+        self.encoder.eval()  # Set to evaluation mode
+        print("✅ T5 encoder loaded")
+        # Load CQL Agent (Decision Maker)
+        print("🧠 Loading CQL agent...")
+        self.cql_agent = CQLAgent(
+            state_dim=config.STATE_DIM,
+            action_dim=config.ACTION_DIM,
+            is_continuous=False,
+            device=self.device
+        )
+        # Load trained model
+        model_path = model_path or str(config.MODEL_PATH)
+        self.cql_agent.load_model(model_path)
+        print("✅ CQL agent loaded")
+        # Initialize sub-agents
+        print("👥 Initializing sub-agents...")
+        self.communication_agent = CommunicationAgent()
+        self.drawing_agent = DrawingAgent()
+        print("✅ All agents initialized")
+        # Memory manager
+        self.memory_manager = memory_manager
+        if self.memory_manager:
+            print("💾 Memory manager enabled")
+        # Conversation history
+        self.conversation_history = []
+        print("🎉 CQL Chatbot System ready!\n")
+    def encode_text(self, text: str) -> np.ndarray:
+        """
+        Encode text into T5 embedding
+        Args:
+            text: Input text
+        Returns:
+            Embedding vector (768-dim)
+        """
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.encoder(**inputs)
+            # Use mean pooling over sequence
+            embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy().flatten()
+        return embedding
+    def get_action(self, text: str) -> Tuple[int, np.ndarray]:
+        """
+        Get CQL agent's decision for the input text
+        Args:
+            text: User input text
+        Returns:
+            Tuple of (action_index, q_values)
+        """
+        # Encode text to embedding
+        embedding = self.encode_text(text)
+        # Get action from CQL agent
+        action = self.cql_agent.select_action(embedding, evaluate=True)
+        # Get Q-values for all actions (for visualization)
+        state = self.cql_agent.normalizer.normalize(embedding)
+        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            q_values = self.cql_agent.critic_1(state_tensor).cpu().numpy().flatten()
+        return action, q_values
+    def chat(self, user_message: str, temperature: float = 0.7) -> Dict:
+        """
+        Main chat function - processes user message and generates response
+        Args:
+            user_message: User's input message
+            temperature: Response creativity
+        Returns:
+            Dictionary containing response and metadata
+        """
+        # Get CQL agent's decision
+        action, q_values = self.get_action(user_message)
+        action_name = config.ACTION_MAPPING[action]
+        print(f"\n🤖 CQL Decision: {action_name} (Action {action})")
+        print(f"📊 Q-values: {q_values}")
+        # Initialize variables
+        response_text = ""
+        image_path = None
+        # IMPROVED LOGIC: Check for drawing keywords FIRST
+        # Override CQL decision if drawing keywords detected
+        drawing_keywords = ['vẽ', 'sketch', 'phác thảo', 'hình', 'ảnh', 'tranh', 'draw', 'paint', 'create image', 'generate']
+        is_drawing_request = any(keyword in user_message.lower() for keyword in drawing_keywords)
+        # Force Drawing Agent if keywords detected
+        if is_drawing_request:
+            print("🎨 Drawing keywords detected! Forcing Drawing Agent.")
+            action = 1
+            action_name = config.ACTION_MAPPING[1]
+        # Execute based on final action
+        if action == 0:  # Communication Agent
+            response_text = self.communication_agent.generate_response(
+                user_message,
+                self.conversation_history,
+                temperature
+            )
+        elif action == 1:  # Drawing Agent
+            response_text, image_path = self.drawing_agent.generate_sketch(user_message)
+        elif action == 2:  # Clarification - fallback to Communication
+            print("⚠️ CQL suggested Clarification. Using Communication Agent.")
+            response_text = self.communication_agent.generate_response(
+                user_message,
+                self.conversation_history,
+                temperature
+            )
+            action = 0
+            action_name = config.ACTION_MAPPING[0]
+        # Update conversation history
+        self.conversation_history.append({
+            'role': 'user',
+            'content': user_message
+        })
+        self.conversation_history.append({
+            'role': 'assistant',
+            'content': response_text,
+            'action': action,
+            'action_name': action_name
+        })
+        # Limit history length
+        if len(self.conversation_history) > config.MAX_HISTORY_LENGTH:
+            self.conversation_history = self.conversation_history[-config.MAX_HISTORY_LENGTH:]
+        # Save to memory manager if available
+        if self.memory_manager:
+            self.memory_manager.save_message('user', user_message)
+            self.memory_manager.save_message(
+                'assistant',
+                response_text,
+                {
+                    'action': action,
+                    'action_name': action_name,
+                    'q_values': q_values.tolist()
+                }
+            )
+        return {
+            'response': response_text,
+            'action': action,
+            'action_name': action_name,
+            'q_values': q_values.tolist(),
+            'image_path': image_path
+        }
+    def _generate_clarification_request(self, user_message: str) -> str:
+        """Generate a clarification request when input is unclear"""
+        clarifications = [
+            f"Xin lỗi, tôi chưa hiểu rõ yêu cầu của bạn: '{user_message}'. Bạn có thể nói rõ hơn được không?",
+            f"Tôi cần thêm thông tin để hiểu câu hỏi của bạn. Bạn muốn tôi làm gì với: '{user_message}'?",
+            f"Câu hỏi của bạn chưa rõ ràng. Bạn có thể diễn đạt lại không?",
+            f"Hmm, tôi không chắc bạn đang hỏi gì. Bạn có thể cung cấp thêm chi tiết không?"
+        ]
+        import random
+        return random.choice(clarifications)
+    def clear_history(self):
+        """Clear conversation history"""
+        self.conversation_history = []
+        print("🗑️ Conversation history cleared")
+    def get_action_distribution(self) -> Dict[str, int]:
+        """Get distribution of actions taken in current conversation"""
+        distribution = {name: 0 for name in config.ACTION_MAPPING.values()}
+        for msg in self.conversation_history:
+            if msg.get('role') == 'assistant' and 'action_name' in msg:
+                action_name = msg['action_name']
+                distribution[action_name] = distribution.get(action_name, 0) + 1
+        return distribution

communication_agent.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+Communication Agent - Gemini API Version (Stable)
+Backup GPT-2 version available in communication_agent_gpt2.py
+"""
+import google.generativeai as genai
+from typing import List, Dict
+import os
+import config
+class CommunicationAgent:
+    def __init__(self, api_key: str = None):
+        """Initialize Communication Agent with Gemini API"""
+        self.api_key = api_key or os.getenv("GEMINI_API_KEY", "")
+        if self.api_key:
+            genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel(config.GEMINI_MODEL)
+            self.enabled = True
+            print("✅ Communication Agent (Gemini) ready!")
+        else:
+            self.model = None
+            self.enabled = False
+            print("⚠️ Warning: GEMINI_API_KEY not set. Please add it to .env file")
+        # System context - Clear instructions for better responses
+        self.system_context = (
+            "You are a helpful, friendly AI assistant. "
+            "Respond naturally and conversationally. "
+            "Keep responses concise (2-3 sentences). "
+            "Be warm and engaging. "
+            "If you don't understand, ask for clarification politely."
+        )
+    def generate_response(
+        self,
+        user_message: str,
+        conversation_history: List[Dict] = None,
+        temperature: float = 0.7
+    ) -> str:
+        """
+        Generate a conversational response using Gemini
+        Args:
+            user_message: User's input message
+            conversation_history: Previous conversation context
+            temperature: Response creativity (0.0-1.0)
+        Returns:
+            Generated response text
+        """
+        if not self.enabled:
+            return "⚠️ Gemini API not configured. Please add GEMINI_API_KEY to .env file."
+        try:
+            # Build context from history
+            context = self._build_context(conversation_history)
+            # Create prompt with system context
+            prompt = f"""{self.system_context}
+{context}
+User: {user_message}
+Assistant:"""
+            # Generate response with Gemini
+            generation_config = {
+                'temperature': temperature,
+                'top_p': 0.95,
+                'top_k': 40,
+                'max_output_tokens': 200,
+            }
+            response = self.model.generate_content(
+                prompt,
+                generation_config=generation_config
+            )
+            return response.text.strip()
+        except Exception as e:
+            return f"Sorry, an error occurred: {str(e)}"
+    def _build_context(self, conversation_history: List[Dict] = None) -> str:
+        """Build conversation context from history"""
+        if not conversation_history:
+            return ""
+        context = "Previous conversation:\n"
+        # Use last 5 messages for context
+        for msg in conversation_history[-5:]:
+            role = "User" if msg.get('role') == 'user' else "Assistant"
+            content = msg.get('content', '')
+            context += f"{role}: {content}\n"
+        return context

config.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+Configuration file for CQL Chatbot System
+"""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Project paths
+PROJECT_ROOT = Path(__file__).parent
+MODEL_PATH = PROJECT_ROOT / "Conservative Q-learning" / "saved_agent_1"
+# Model configuration
+STATE_DIM = 768  # T5-base embedding dimension
+ACTION_DIM = 3   # 0: Chat, 1: Sketch, 2: Clarify
+DEVICE = os.getenv("DEVICE", "cuda")
+# Action mapping
+ACTION_MAPPING = {
+    0: "Communication Agent",
+    1: "Drawing Agent",
+    2: "Clarification"
+}
+ACTION_DESCRIPTIONS = {
+    0: "💬 Trò chuyện thông thường",
+    1: "🎨 Vẽ sketch/hình ảnh",
+    2: "❓ Cần làm rõ thêm"
+}
+# T5 Model
+T5_MODEL_NAME = "t5-base"
+# Gemini API (Primary - Stable)
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+GEMINI_MODEL = "gemini-2.5-flash"
+# GPT-2 Model (Backup - Local, available in communication_agent_gpt2.py)
+GPT2_MODEL_NAME = "gpt2"  # Can use "gpt2-medium" or "gpt2-large" for better quality
+# Streamlit UI Configuration
+APP_TITLE = "🤖 Chatbot Wisdom - CQL Multi-Agent System"
+APP_ICON = "🧠"
+SIDEBAR_TITLE = "⚙️ Cài đặt"
+# Chat settings
+MAX_HISTORY_LENGTH = 50
+DEFAULT_TEMPERATURE = 0.7
+DEFAULT_MAX_TOKENS = 512

drawing_agent.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+Drawing Agent - Integrated with Stable Diffusion Model
+Uses pre-trained Diffusion model from Agent_Diffusion folder
+"""
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'Agent_Diffusion'))
+import torch
+from typing import Optional, Tuple
+from pathlib import Path
+import random
+from PIL import Image
+from diffusers import StableDiffusionPipeline
+from stable_diffusion import MiniDiffusionPipeline
+class DrawingAgent:
+    def __init__(self):
+        """Initialize Drawing Agent with Diffusion Model"""
+        self.enabled = True
+        self.output_dir = Path("generated_images")
+        self.output_dir.mkdir(exist_ok=True)
+        # Model paths
+        self.base_model_id = "runwayml/stable-diffusion-v1-5"
+        self.unet_path = Path("Agent_Diffusion/unet-mini.safetensors")
+        self.vae_path = Path("Agent_Diffusion/vae-finetuned.safetensors")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Model configuration
+        self.tiny_unet_config = {
+            "unet_block_out_channels": (128, 256, 512),
+        }
+        # Lazy loading - only load when first needed
+        self.pipeline = None
+        self.model_loaded = False
+        print("✅ Drawing Agent initialized (Diffusion model will load on first use)")
+    def _load_model(self):
+        """Load Diffusion model (lazy loading)"""
+        if self.model_loaded:
+            return
+        try:
+            print("🎨 Loading Diffusion model...")
+            # Initialize MiniDiffusionPipeline
+            container = MiniDiffusionPipeline(
+                base_model_id=self.base_model_id,
+                device=self.device,
+                config_overrides=self.tiny_unet_config
+            )
+            # Load UNet weights
+            print(f"Loading UNet from {self.unet_path}...")
+            unet_weights = torch.load(str(self.unet_path), map_location=self.device)
+            container.unet.load_state_dict(unet_weights)
+            # Load VAE weights
+            print(f"Loading VAE from {self.vae_path}...")
+            vae_weights = torch.load(str(self.vae_path), map_location=self.device)
+            container.vae.load_state_dict(vae_weights)
+            # Create inference pipeline
+            torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
+            self.pipeline = StableDiffusionPipeline(
+                unet=container.unet,
+                vae=container.vae,
+                text_encoder=container.text_encoder,
+                tokenizer=container.tokenizer,
+                scheduler=container.noise_scheduler,
+                safety_checker=None,
+                feature_extractor=None,
+            ).to(self.device)
+            if self.device == "cuda":
+                self.pipeline.to(dtype=torch_dtype)
+            self.pipeline.set_progress_bar_config(disable=True)
+            self.model_loaded = True
+            print("✅ Diffusion model loaded successfully!")
+        except Exception as e:
+            print(f"❌ Error loading Diffusion model: {e}")
+            self.enabled = False
+    def generate_sketch(self, prompt: str) -> Tuple[str, Optional[str]]:
+        """
+        Generate a sketch based on the prompt using Diffusion model
+        Args:
+            prompt: Description of what to draw
+        Returns:
+            Tuple of (response_text, image_path)
+        """
+        try:
+            # Load model if not loaded
+            if not self.model_loaded:
+                self._load_model()
+            if not self.enabled or self.pipeline is None:
+                return self._fallback_response(prompt)
+            # Parse the sketch request
+            clean_prompt = self.parse_sketch_request(prompt)
+            print(f"🎨 Generating image for: '{clean_prompt}'")
+            # Generate image
+            current_seed = random.randint(0, 2**32 - 1)
+            generator = torch.Generator(device=self.device).manual_seed(current_seed)
+            with torch.no_grad():
+                image = self.pipeline(
+                    prompt=clean_prompt,
+                    num_inference_steps=50,
+                    generator=generator,
+                    guidance_scale=7.5
+                ).images[0]
+            # Save image
+            timestamp = Path(f"{random.randint(1000, 9999)}.png")
+            image_path = self.output_dir / timestamp
+            image.save(str(image_path))
+            print(f"✅ Image saved to: {image_path}")
+            # Response text
+            response_text = f"🎨 **Image Generated!**\\n\\n"
+            response_text += f"Prompt: **{clean_prompt}**\\n"
+            response_text += f"Seed: {current_seed}\\n\\n"
+            response_text += "Here's your generated image:"
+            return response_text, str(image_path)
+        except Exception as e:
+            error_msg = f"Sorry, I encountered an error while generating the image: {str(e)}"
+            print(f"❌ Drawing error: {e}")
+            return error_msg, None
+    def _fallback_response(self, prompt: str) -> Tuple[str, None]:
+        """Fallback response when model can't load"""
+        clean_prompt = self.parse_sketch_request(prompt)
+        response_text = f"🎨 **Drawing Request Received**\\n\\n"
+        response_text += f"I understand you want me to draw: **{clean_prompt}**\\n\\n"
+        response_text += "⚠️ **Note**: Diffusion model failed to load. "
+        response_text += "Please check that the model files exist in Agent_Diffusion folder."
+        return response_text, None
+    def parse_sketch_request(self, user_message: str) -> str:
+        """
+        Parse the sketch request to extract key details
+        Args:
+            user_message: User's request message
+        Returns:
+            Cleaned prompt for image generation
+        """
+        # Remove common drawing keywords to get the core subject
+        drawing_keywords = [
+            'vẽ', 'sketch', 'phác thảo', 'hình', 'ảnh', 'tranh',
+            'draw', 'paint', 'create', 'make', 'generate',
+            'cho tôi', 'cho mình', 'giúp tôi', 'help me', 'for me',
+            'một', 'a', 'an', 'the'
+        ]
+        prompt = user_message.lower()
+        # Remove keywords
+        for keyword in drawing_keywords:
+            prompt = prompt.replace(keyword, '')
+        # Clean up
+        prompt = ' '.join(prompt.split())  # Remove extra spaces
+        prompt = prompt.strip()
+        # If empty after cleaning, use original
+        if not prompt or len(prompt) < 3:
+            prompt = user_message
+        return prompt
+    def is_drawing_request(self, user_message: str) -> bool:
+        """
+        Check if the message is a drawing request
+        Args:
+            user_message: User's message
+        Returns:
+            True if it's a drawing request
+        """
+        drawing_keywords = [
+            'vẽ', 'sketch', 'phác thảo', 'draw', 'paint',
+            'create image', 'generate image', 'make picture'
+        ]
+        message_lower = user_message.lower()
+        return any(keyword in message_lower for keyword in drawing_keywords)

memory_manager.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+Memory Manager - Lưu trữ và quản lý lịch sử hội thoại
+"""
+import json
+import os
+from datetime import datetime
+from typing import List, Dict, Optional
+from pathlib import Path
+class MemoryManager:
+    def __init__(self, storage_dir: str = "conversation_history"):
+        """
+        Initialize Memory Manager
+        Args:
+            storage_dir: Thư mục lưu trữ lịch sử hội thoại
+        """
+        self.storage_dir = Path(storage_dir)
+        self.storage_dir.mkdir(exist_ok=True)
+        self.current_session_file = None
+    def create_new_session(self) -> str:
+        """
+        Tạo session mới
+        Returns:
+            Session ID
+        """
+        session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.current_session_file = self.storage_dir / f"session_{session_id}.json"
+        # Tạo file session mới
+        session_data = {
+            "session_id": session_id,
+            "created_at": datetime.now().isoformat(),
+            "messages": []
+        }
+        self._save_session(session_data)
+        return session_id
+    def save_message(self, role: str, content: str, metadata: Dict = None):
+        """
+        Lưu tin nhắn vào session hiện tại
+        Args:
+            role: 'user' hoặc 'assistant'
+            content: Nội dung tin nhắn
+            metadata: Thông tin bổ sung (action, q_values, etc.)
+        """
+        if not self.current_session_file:
+            self.create_new_session()
+        session_data = self._load_session()
+        message = {
+            "role": role,
+            "content": content,
+            "timestamp": datetime.now().isoformat()
+        }
+        if metadata:
+            message.update(metadata)
+        session_data["messages"].append(message)
+        self._save_session(session_data)
+    def load_session(self, session_id: str) -> Optional[Dict]:
+        """
+        Load session theo ID
+        Args:
+            session_id: ID của session cần load
+        Returns:
+            Session data hoặc None nếu không tìm thấy
+        """
+        session_file = self.storage_dir / f"session_{session_id}.json"
+        if not session_file.exists():
+            return None
+        with open(session_file, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    def get_all_sessions(self) -> List[Dict]:
+        """
+        Lấy danh sách tất cả sessions
+        Returns:
+            List các session info
+        """
+        sessions = []
+        for file in self.storage_dir.glob("session_*.json"):
+            try:
+                with open(file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    sessions.append({
+                        "session_id": data["session_id"],
+                        "created_at": data["created_at"],
+                        "message_count": len(data["messages"]),
+                        "file": str(file)
+                    })
+            except Exception as e:
+                print(f"Error loading session {file}: {e}")
+        # Sắp xếp theo thời gian tạo (mới nhất trước)
+        sessions.sort(key=lambda x: x["created_at"], reverse=True)
+        return sessions
+    def delete_session(self, session_id: str) -> bool:
+        """
+        Xóa session
+        Args:
+            session_id: ID của session cần xóa
+        Returns:
+            True nếu xóa thành công
+        """
+        session_file = self.storage_dir / f"session_{session_id}.json"
+        if session_file.exists():
+            session_file.unlink()
+            return True
+        return False
+    def _load_session(self) -> Dict:
+        """Load session hiện tại"""
+        if not self.current_session_file or not self.current_session_file.exists():
+            return {
+                "session_id": "default",
+                "created_at": datetime.now().isoformat(),
+                "messages": []
+            }
+        with open(self.current_session_file, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    def _save_session(self, session_data: Dict):
+        """Lưu session data"""
+        if not self.current_session_file:
+            return
+        with open(self.current_session_file, 'w', encoding='utf-8') as f:
+            json.dump(session_data, f, ensure_ascii=False, indent=2)
+    def get_current_messages(self) -> List[Dict]:
+        """Lấy tất cả messages của session hiện tại"""
+        session_data = self._load_session()
+        return session_data.get("messages", [])
+    def export_session(self, session_id: str, output_file: str):
+        """
+        Export session ra file
+        Args:
+            session_id: ID của session
+            output_file: Đường dẫn file output
+        """
+        session_data = self.load_session(session_id)
+        if not session_data:
+            return False
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(session_data, f, ensure_ascii=False, indent=2)
+        return True

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit>=1.31.0
+torch>=2.0.0
+transformers>=4.36.0
+numpy>=1.24.0
+pillow>=10.0.0
+google-generativeai>=0.3.0
+python-dotenv>=1.0.0
+plotly>=5.18.0
+sentencepiece>=0.1.99
+diffusers>=0.25.0
+accelerate>=0.25.0
+safetensors>=0.4.0