""" Alkaid A — Hugging Face Spaces App Training + Inference UI hosted entirely on Hugging Face Space Type: Docker (GPU required — A10G or A100 recommended) """ import os import json import threading import time import gradio as gr from pathlib import Path # ============================================================================= # CONFIGURATION # ============================================================================= BASE_MODEL = "Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled" OUTPUT_DIR = "/home/user/app/alkaid_a_checkpoints" FINAL_DIR = "/home/user/app/alkaid_a_final" DATA_DIR = "/home/user/app/data" LOG_FILE = "/home/user/app/training.log" SYSTEM_PROMPT = ( "You are Alkaid A, an advanced AI coding and deployment assistant. " "You follow a rigorous multi-phase workflow: (1) Provide detailed feedback " "with pros/cons on code or plans, identifying weak points and breaks. " "(2) Guide through a detailed debug phase. (3) Outline a deployment strategy " "ready for production. (4) Repeat debugging with variations across five iterations. " "(5) Conduct deep dives on integration covering security, scalability, and compliance. " "(6) Test all API endpoints and set monitoring. (7) Scrape help docs, check tool " "compatibility, infer issues, and adjust on the fly. (8) Ensure every version is " "backed up in a GitHub releases folder, starting at 00.00.00 and incrementing by " "00.00.01. (9) Guide through pushing changes to a GitHub repository. " "(10) Include user testing, performance benchmarking, and hardening. " "(11) Add documentation for future developers and automated testing. " "(12) Summarize what went well and acknowledge progress." ) # Global state training_status = {"running": False, "progress": "", "log": ""} loaded_model = {"model": None, "tokenizer": None} # ============================================================================= # TRAINING TAB # ============================================================================= def format_opus_example(example): """Convert Opus dataset row to chat format.""" assistant_content = f"\n{example['thinking']}\n\n\n{example['solution']}" return { "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": example["problem"]}, {"role": "assistant", "content": assistant_content}, ] } def run_training( hf_token, hub_repo_id, learning_rate, num_epochs, lora_rank, max_seq_length, batch_size, use_4bit, custom_data_text, progress=gr.Progress(track_tqdm=True), ): """Run the full training pipeline.""" global training_status if training_status["running"]: return "Training is already running. Please wait." training_status["running"] = True training_status["log"] = "" def log(msg): training_status["log"] += msg + "\n" training_status["progress"] = msg print(msg) try: # --- Login --- log("Step 1/7: Authenticating with Hugging Face...") if hf_token: from huggingface_hub import login login(token=hf_token) log(" Logged in successfully.") else: log(" No token provided — will save locally only.") # --- Load Model --- log(f"Step 2/7: Loading base model ({BASE_MODEL})...") from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch if use_4bit: bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=bnb_config, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, ) else: model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token log(" Model loaded.") # --- Attach LoRA --- log(f"Step 3/7: Attaching LoRA (rank={lora_rank})...") from peft import LoraConfig, get_peft_model, TaskType lora_config = LoraConfig( r=int(lora_rank), lora_alpha=int(lora_rank), target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.0, bias="none", task_type=TaskType.CAUSAL_LM, ) model = get_peft_model(model, lora_config) trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) total = sum(p.numel() for p in model.parameters()) log(f" LoRA attached. Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)") # --- Prepare Data --- log("Step 4/7: Preparing training data...") from datasets import load_dataset, Dataset, concatenate_datasets # Load Opus dataset opus_ds = load_dataset("nohurry/Opus-4.6-Reasoning-3000x-filtered", split="train") opus_ds = opus_ds.map(format_opus_example, remove_columns=opus_ds.column_names) log(f" Opus dataset: {len(opus_ds)} examples") # Process custom data if provided if custom_data_text and custom_data_text.strip(): custom_rows = [] for line in custom_data_text.strip().split("\n"): line = line.strip() if line: try: custom_rows.append(json.loads(line)) except json.JSONDecodeError: log(f" Warning: Skipping invalid JSON line") if custom_rows: custom_ds = Dataset.from_list(custom_rows) # Weight custom data 3x all_ds = concatenate_datasets([opus_ds, custom_ds, custom_ds, custom_ds]) log(f" Custom data: {len(custom_rows)} examples (weighted 3x)") else: all_ds = opus_ds else: all_ds = opus_ds # Apply chat template def apply_template(example): text = tokenizer.apply_chat_template( example["messages"], tokenize=False, add_generation_prompt=False ) return {"text": text} all_ds = all_ds.map(apply_template) all_ds = all_ds.shuffle(seed=42) log(f" Total training examples: {len(all_ds)}") # --- Train --- log(f"Step 5/7: Training ({num_epochs} epochs, lr={learning_rate})...") from trl import SFTTrainer, SFTConfig training_args = SFTConfig( output_dir=OUTPUT_DIR, per_device_train_batch_size=int(batch_size), gradient_accumulation_steps=4, warmup_steps=10, num_train_epochs=int(num_epochs), learning_rate=float(learning_rate), optim="adamw_8bit", lr_scheduler_type="cosine", bf16=True, fp16=False, logging_steps=5, save_steps=50, save_total_limit=2, max_seq_length=int(max_seq_length), dataset_text_field="text", report_to="none", seed=42, ) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=all_ds, args=training_args, ) result = trainer.train() log(f" Training complete! Final loss: {result.training_loss:.4f}") # --- Save --- log("Step 6/7: Saving model...") model.save_pretrained(FINAL_DIR) tokenizer.save_pretrained(FINAL_DIR) log(f" Saved to {FINAL_DIR}") # --- Push to Hub --- if hf_token and hub_repo_id: log(f"Step 7/7: Pushing to {hub_repo_id}...") model.push_to_hub(hub_repo_id, use_auth_token=hf_token) tokenizer.push_to_hub(hub_repo_id, use_auth_token=hf_token) log(f" Live at: https://huggingface.co/{hub_repo_id}") else: log("Step 7/7: Skipped push (no token or repo ID).") log("\n" + "=" * 50) log("TRAINING COMPLETE!") log("=" * 50) training_status["running"] = False return training_status["log"] except Exception as e: training_status["running"] = False error_msg = f"\nERROR: {str(e)}" log(error_msg) import traceback log(traceback.format_exc()) return training_status["log"] # ============================================================================= # INFERENCE TAB # ============================================================================= def load_model_for_inference(model_source, hf_token): """Load model for inference (either local checkpoint or from Hub).""" global loaded_model try: from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch source = FINAL_DIR if model_source == "Local checkpoint" else model_source if hf_token: from huggingface_hub import login login(token=hf_token) bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, ) loaded_model["tokenizer"] = AutoTokenizer.from_pretrained(source, trust_remote_code=True) loaded_model["model"] = AutoModelForCausalLM.from_pretrained( source, quantization_config=bnb_config, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, ) if loaded_model["tokenizer"].pad_token is None: loaded_model["tokenizer"].pad_token = loaded_model["tokenizer"].eos_token return f"Model loaded from: {source}" except Exception as e: return f"Error loading model: {str(e)}" def generate_response(user_message, temperature, max_tokens, system_override): """Generate a response using the loaded model.""" if loaded_model["model"] is None: return "Please load a model first using the 'Load Model' button." try: import torch model = loaded_model["model"] tokenizer = loaded_model["tokenizer"] sys_prompt = system_override if system_override.strip() else SYSTEM_PROMPT messages = [ {"role": "system", "content": sys_prompt}, {"role": "user", "content": user_message}, ] input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( input_ids, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=0.9, do_sample=True, pad_token_id=tokenizer.pad_token_id, ) response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True) return response except Exception as e: return f"Generation error: {str(e)}" # ============================================================================= # GRADIO UI # ============================================================================= # Load default custom data default_custom_data = "" custom_data_path = Path("/home/user/app/alkaid_a_training_data.jsonl") if custom_data_path.exists(): default_custom_data = custom_data_path.read_text() with gr.Blocks( title="Alkaid A — Train & Deploy", theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"), css=""" .main-title { text-align: center; margin-bottom: 0; } .subtitle { text-align: center; color: #6b7280; margin-top: 4px; } """ ) as app: gr.Markdown("# Alkaid A", elem_classes="main-title") gr.Markdown( "*Fine-tuned from Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled*", elem_classes="subtitle" ) with gr.Tabs(): # ---- TRAINING TAB ---- with gr.Tab("Train", id="train"): gr.Markdown("### Train Your Model") gr.Markdown( "This will fine-tune the base model using LoRA on the Opus reasoning " "dataset plus your custom data, then push the result to your Hugging Face repo." ) with gr.Row(): with gr.Column(scale=1): hf_token_train = gr.Textbox( label="Hugging Face Token (Write access)", type="password", placeholder="hf_...", ) hub_repo = gr.Textbox( label="Hub Repo ID (e.g., YourName/Alkaid-A)", placeholder="YourUsername/Alkaid-A", ) with gr.Column(scale=1): with gr.Row(): lr = gr.Number(label="Learning Rate", value=2e-4) epochs = gr.Number(label="Epochs", value=3, precision=0) with gr.Row(): rank = gr.Number(label="LoRA Rank", value=16, precision=0) seq_len = gr.Number(label="Max Seq Length", value=2048, precision=0) with gr.Row(): bs = gr.Number(label="Batch Size", value=1, precision=0) fourbit = gr.Checkbox(label="4-bit Quantization", value=True) custom_data = gr.Code( label="Custom Training Data (JSONL — one JSON object per line)", value=default_custom_data, language="json", lines=10, ) train_btn = gr.Button("Start Training", variant="primary", size="lg") train_output = gr.Textbox(label="Training Log", lines=20, interactive=False) train_btn.click( fn=run_training, inputs=[hf_token_train, hub_repo, lr, epochs, rank, seq_len, bs, fourbit, custom_data], outputs=train_output, ) # ---- INFERENCE TAB ---- with gr.Tab("Chat", id="chat"): gr.Markdown("### Chat with Alkaid A") with gr.Row(): model_source = gr.Textbox( label="Model Source", value="Local checkpoint", placeholder="Local checkpoint OR HuggingFace repo ID", ) hf_token_infer = gr.Textbox( label="HF Token (if loading from Hub)", type="password", placeholder="hf_...", ) load_btn = gr.Button("Load Model") load_status = gr.Textbox(label="Status", interactive=False) load_btn.click(fn=load_model_for_inference, inputs=[model_source, hf_token_infer], outputs=load_status) system_box = gr.Textbox( label="System Prompt (optional override)", value="", placeholder="Leave empty to use default Alkaid A system prompt", lines=3, ) chatbot_input = gr.Textbox( label="Your Message", placeholder="Paste your code or describe your plan...", lines=6, ) with gr.Row(): temp = gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7, step=0.1) max_tok = gr.Slider(label="Max Tokens", minimum=256, maximum=4096, value=2048, step=256) gen_btn = gr.Button("Generate", variant="primary") response_box = gr.Textbox(label="Alkaid A Response", lines=20, interactive=False) gen_btn.click( fn=generate_response, inputs=[chatbot_input, temp, max_tok, system_box], outputs=response_box, ) # ---- ABOUT TAB ---- with gr.Tab("About", id="about"): gr.Markdown(""" ### Alkaid A **Base Model:** [Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled](https://huggingface.co/Jackrong/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled) **Training Dataset:** [nohurry/Opus-4.6-Reasoning-3000x-filtered](https://huggingface.co/datasets/nohurry/Opus-4.6-Reasoning-3000x-filtered) (2,326 reasoning examples) **Method:** LoRA SFT with 4-bit quantization **Alkaid A's Workflow:** 1. Detailed code/plan feedback with pros and cons 2. Guided debug phase 3. Production deployment strategy 4. 5x debug iterations with variations 5. Security, scalability, compliance deep dive 6. API endpoint testing and monitoring setup 7. Help doc scraping and compatibility checks 8. GitHub versioned releases (00.00.XX) 9. Guided repository push 10. User testing, benchmarking, hardening 11. Developer documentation and automated tests 12. Progress summary and acknowledgment **License:** Apache 2.0 """) if __name__ == "__main__": app.launch(server_name="0.0.0.0", server_port=7860, share=False)