Spaces:

AI-Talent-Force
/

exec_chatbot_v1

Paused

AI-Talent-Force Claude Sonnet 4.5 commited on 23 days ago

Commit

c8d6960

1 Parent(s): 6fdb30f

Add detailed progress tracking for model initialization

- Shows 4-step progress: tokenizer → quantization → base model → LoRA
- Added visual indicators (✓, ⏳, 🚀, 🎯) for each stage
- Includes time estimate for base model loading (2-3 minutes)
- Added status indicator in UI showing model is ready
- Makes startup process transparent to users watching logs

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show

app.py +21 -5

app.py CHANGED Viewed

@@ -9,11 +9,15 @@ BASE_MODEL = "unsloth/qwen3-30b-a3b"
 LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
 # Load model and tokenizer at startup (once)
-print("Initializing CEO AI Executive...")
-print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-print("Loading base model...")
 # Use 4-bit quantization to fit in GPU memory
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -21,19 +25,27 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4"
 )
 model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     quantization_config=quantization_config,
     device_map="auto",
     trust_remote_code=True
 )
-print("Loading LoRA adapter...")
 model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
 model.eval()
-print("Model loaded successfully!")
 @spaces.GPU
 def chat_with_ceo(message, history):
@@ -89,6 +101,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
         **Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
         """
     )

 LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
 # Load model and tokenizer at startup (once)
+print("=" * 60)
+print("🚀 INITIALIZING CEO AI EXECUTIVE")
+print("=" * 60)
+print("\n[1/4] Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+print("✓ Tokenizer loaded successfully!")
+print("\n[2/4] Configuring 4-bit quantization...")
 # Use 4-bit quantization to fit in GPU memory
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type="nf4"
 )
+print("✓ Quantization config ready!")
+print("\n[3/4] Loading base model (Qwen3-30B)...")
+print("⏳ This may take 2-3 minutes - downloading and quantizing 30B parameters...")
 model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     quantization_config=quantization_config,
     device_map="auto",
     trust_remote_code=True
 )
+print("✓ Base model loaded successfully!")
+print("\n[4/4] Loading LoRA adapter (CEO fine-tuning)...")
 model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
 model.eval()
+print("✓ LoRA adapter loaded successfully!")
+print("\n" + "=" * 60)
+print("🎯 CEO AI EXECUTIVE IS READY!")
+print("=" * 60)
+print("Model is loaded in memory and ready for fast inference.\n")
 @spaces.GPU
 def chat_with_ceo(message, history):
         Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
         **Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
+        ---
+        ✅ **Model Status:** Loaded and ready! The model is kept in memory for fast responses.
         """
     )