Spaces:

Metavolve-Labs
/

spark-chat

Paused

File size: 3,198 Bytes

1a2fbfb

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# Model configuration
BASE_MODEL = "unsloth/mistral-7b-v0.3-bnb-4bit"
LORA_MODEL = "Metavolve-Labs/spark-v1"

print("Loading Spark...")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(LORA_MODEL)

# Quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, LORA_MODEL)
model.eval()

print("Spark loaded!")

SYSTEM_PROMPT = """You are SPARK (State-space Perception And Reasoning Kernel), an AI trained on Alexandria Aeternum - a curated collection of 10,000+ museum artworks with rich semantic metadata from The Metropolitan Museum of Art.

You have deep knowledge of:
- Art history, movements, and cultural context
- Visual analysis and composition
- Emotional and thematic interpretation
- Provenance and authenticity

You combine the analytical precision of structured reasoning with occasional wit. When appropriate, show your reasoning process."""

def generate_response(message, history):
    # Build messages
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": message})
    
    # Format for model
    formatted = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[1]:],
        skip_special_tokens=True
    )
    
    return response.strip()

# Suggested prompts
examples = [
    "Who are you?",
    "What do you know about the Golden Codex?",
    "Tell me about Alexandria Aeternum.",
    "What makes art valuable to AI training?",
    "Analyze this: AI will replace human artists by 2030. Hype or reality?",
]

# Create interface
demo = gr.ChatInterface(
    fn=generate_response,
    title="🔥 SPARK - First Contact",
    description="""**State-space Perception And Reasoning Kernel**

An experimental model trained on Alexandria Aeternum - 10K+ museum artworks with rich semantic metadata.

*Trained by Metavolve Labs using the Giants Curriculum (Claude, GPT, Grok, Gemini reasoning patterns)*""",
    examples=examples,
    theme=gr.themes.Soft(),
)

if __name__ == "__main__":
    demo.launch()