Spaces:

Amossofer
/

test2

Runtime error

App Files Files Community

Amossofer commited on Aug 3

Commit

bc5c17c

1 Parent(s): 3ff7c04

tt

Browse files

Files changed (1) hide show

app.py +57 -62

app.py CHANGED Viewed

@@ -1,70 +1,65 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import torch.nn.functional as F
-# Load tiny model (CPU-friendly)
 MODEL_ID = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to("cpu")
-def get_logits(prompt, system_msg):
-    """Run the model and return logits for the next token."""
-    input_text = f"<|system|>{system_msg}\n<|user|>{prompt}<|assistant|>"
-    inputs = tokenizer(input_text, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    logits = outputs.logits[:, -1, :]  # Only final token logits
-    return logits
-def blended_generate(prompt, sys1, sys2, wa, wb, temperature=1.0):
-    # Get logits from both system prompts
-    logits1 = get_logits(prompt, sys1)
-    logits2 = get_logits(prompt, sys2)
-    # Weighted sum
-    blended_logits = wa * logits1 + wb * logits2
-    # Apply temperature
-    blended_logits = blended_logits / temperature
-    # Convert to probabilities
-    probs = F.softmax(blended_logits, dim=-1)
-    # Sample one token from the distribution
-    token_id = torch.multinomial(probs, num_samples=1)
-    next_token = tokenizer.decode(token_id[0])
-    return next_token.strip()
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("## 🔀 Blended System Prompts using Falcon-RW-1B")
-    with gr.Row():
-        prompt = gr.Textbox(label="User Prompt", value="Tell me a joke about computers.")
-    with gr.Row():
-        sys1 = gr.Textbox(label="System Prompt A", value="You are a polite assistant.")
-        sys2 = gr.Textbox(label="System Prompt B", value="You are a sarcastic assistant.")
-    with gr.Row():
-        wa = gr.Slider(-10, 10, value=1.0, step=0.1, label="Weight A")
-        wb = gr.Slider(-10, 10, value=1.0, step=0.1, label="Weight B")
-    with gr.Row():
-        temperature = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Temperature")
-    output = gr.Textbox(label="Next Token")
-    generate_btn = gr.Button("Generate Next Token")
-    generate_btn.click(
-        fn=blended_generate,
-        inputs=[prompt, sys1, sys2, wa, wb, temperature],
-        outputs=output,
-    )
-demo.launch()

 import gradio as gr
 import torch
 import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModelForCausalLM
 MODEL_ID = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to("cpu")
+def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperature=1.0, top_p=0.95):
+    promptA = f"<|system|>{sysA}\n<|user|>{user_input}<|assistant|>"
+    promptB = f"<|system|>{sysB}\n<|user|>{user_input}<|assistant|>"
+    idsA = tokenizer(promptA, return_tensors="pt").input_ids.to(model.device)
+    idsB = tokenizer(promptB, return_tensors="pt").input_ids.to(model.device)
+    outA = idsA.clone()
+    outB = idsB.clone()
+    response = ""
+    yield response  # initial empty chunk
+    for _ in range(max_new_tokens):
+        with torch.no_grad():
+            logitsA = model(input_ids=outA).logits[:, -1, :]
+            logitsB = model(input_ids=outB).logits[:, -1, :]
+        logits = wa * logitsA + wb * logitsB
+        logits = logits / (temperature if temperature > 0 else 1.0)
+        probs = F.softmax(logits, dim=-1)
+        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
+        cumulative = torch.cumsum(sorted_probs, dim=-1)
+        sorted_probs[cumulative > top_p] = 0
+        sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True)
+        token = sorted_idx[:, torch.multinomial(sorted_probs, 1)].squeeze()
+        outA = torch.cat([outA, token.unsqueeze(0).unsqueeze(0)], dim=1)
+        outB = torch.cat([outB, token.unsqueeze(0).unsqueeze(0)], dim=1)
+        token_str = tokenizer.decode(token)
+        response += token_str
+        yield response
+        if token.item() == tokenizer.eos_token_id:
+            break
+with gr.ChatInterface(
+    fn=generate_stream,
+    inputs=[
+        gr.Textbox(label="System Prompt A", value="You are assistant A"),
+        gr.Textbox(label="System Prompt B", value="You are assistant B"),
+        gr.Slider(label="Weight wA", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
+        gr.Slider(label="Weight wB", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
+        gr.Textbox(label="User Message", placeholder="Enter your message here..."),
+        gr.Slider(label="Max new tokens", minimum=1, maximum=200, step=1, value=50),
+        gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
+        gr.Slider(label="Top‑p", minimum=0.1, maximum=1.0, step=0.05, value=0.95),
+    ],
+    title="Blended Two-System Streaming Chat",
+    description="Stream replies by blending logits from two system-prompts using weights wA and wB.",
+):
+    pass
+if __name__ == "__main__":
+    demo.launch()