Spaces:

Amossofer
/

test2

Runtime error

File size: 1,736 Bytes

2ef1e0a
1782685
 
 
 
 
 
 
 
 
 
 
2ef1e0a
1782685
 
 
 
 
2ef1e0a
e45fce6
 
142eb42
e45fce6
 
 
142eb42
e45fce6
 
142eb42
e45fce6
 
142eb42
1782685
e45fce6
142eb42
1782685
e45fce6
 
142eb42
1782685
 
142eb42
1782685
f45c0a2
1782685
 
 
 
f45c0a2
 
1782685

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr

# Set device: GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load two small models and their tokenizer (you can replace these with your models)
model_name_a = "distilgpt2"
model_name_b = "sshleifer/tiny-gpt2"  # very small GPT2 variant for demo

tokenizer = AutoTokenizer.from_pretrained(model_name_a)

model_a = AutoModelForCausalLM.from_pretrained(model_name_a).to(device)
model_b = AutoModelForCausalLM.from_pretrained(model_name_b).to(device)

model_a.eval()
model_b.eval()

def blend_generate(prompt, wa, wb):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    with torch.no_grad():
        output_a = model_a(input_ids)
        output_b = model_b(input_ids)

    logits_a = output_a.logits[:, -1, :]
    logits_b = output_b.logits[:, -1, :]

    # Weighted sum of raw logits (before softmax)
    blended_logits = wa * logits_a + wb * logits_b

    # Softmax to get probabilities
    probs = torch.softmax(blended_logits, dim=-1)

    # Sample one token from the blended distribution
    token = torch.multinomial(probs, 1)
    next_token_id = token.item()

    next_token = tokenizer.decode([next_token_id])
    return prompt + next_token

# Gradio UI
with gr.Blocks() as demo:
    prompt_input = gr.Textbox(label="Prompt", lines=2)
    weight_a = gr.Slider(0, 1, value=0.5, label="Weight model A")
    weight_b = gr.Slider(0, 1, value=0.5, label="Weight model B")
    output_text = gr.Textbox(label="Output")

    btn = gr.Button("Generate")
    btn.click(blend_generate, inputs=[prompt_input, weight_a, weight_b], outputs=output_text)

demo.launch()