YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

# --- 1. RE-ESTABLISH ENVIRONMENT BASICS ---
import sys
import subprocess
print("Restoring pristine library states...")
subprocess.run([sys.executable, "-m", "pip", "install", "-U", "bitsandbytes", "accelerate"], capture_output=True)

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- 2. CONFIGURATION ---
MODEL_ID = "Phase-Technologies/Qwen2.5-Coder-1.5B-Instruct-Qubik-Merged"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Downloading and loading standalone merged model: {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# Load natively in 16-bit float (highly optimized for fast inference passes)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
    device_map="auto"
)

# --- 3. DEFINE THE MULTI-HOP REASONING PROMPT ---
sample_prompt = (
    "What is the revenue multiple implied by the last funding round valuation of the primary "
    "technology portfolio company owned by the lead investor of the ownership group that "
    "executed the highest-valued sports team acquisition in history, relative to the sector median?"
)

system_content = (
    "You are a factual reasoning agent. Required steps: "
    "1. Generate exactly 2 search query objects using domain parameters. "
    "2. Verify cross-references exactly 1 times. "
    "3. Strictly avoid simulations or factual interpolation."
)

messages = [
    {"role": "system", "content": system_content},
    {"role": "user", "content": sample_prompt}
]

input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([input_text], return_tensors="pt").to(device)

# --- 4. GENERATE AND PRINT HIGH-SPEED OUTPUT ---
print("\n" + "="*60)
print("RUNNING LIVE INFERENCE FROM STANDALONE MERGED REPOSITORY:")
print(f"Prompt: {sample_prompt}\n")

model.eval()
with torch.no_grad():
    outputs = model.generate(
        **inputs, 
        max_new_tokens=256, 
        use_cache=True,
        temperature=0.1,  # Low temperature forces exact structure trace compliance
        do_sample=False
    )

response = tokenizer.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
print(f"Output Trace Response:\n{response}")
print("="*60 + "\n")
Downloads last month
1
Safetensors
Model size
2B params
Tensor type
F16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support