Spaces:

Amossofer
/

test2

Runtime error

App Files Files Community

Amossofer commited on Aug 3, 2025

Commit

3483699

1 Parent(s): bc5c17c

tt

Browse files

Files changed (1) hide show

app.py +13 -11

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForCausalLM
-MODEL_ID = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to("cpu")
 def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperature=1.0, top_p=0.95):
     promptA = f"<|system|>{sysA}\n<|user|>{user_input}<|assistant|>"
@@ -17,15 +17,17 @@ def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperatu
     outA = idsA.clone()
     outB = idsB.clone()
     response = ""
-    yield response  # initial empty chunk
     for _ in range(max_new_tokens):
         with torch.no_grad():
             logitsA = model(input_ids=outA).logits[:, -1, :]
             logitsB = model(input_ids=outB).logits[:, -1, :]
         logits = wa * logitsA + wb * logitsB
         logits = logits / (temperature if temperature > 0 else 1.0)
         probs = F.softmax(logits, dim=-1)
         sorted_probs, sorted_idx = torch.sort(probs, descending=True)
@@ -44,22 +46,22 @@ def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperatu
         if token.item() == tokenizer.eos_token_id:
             break
-with gr.ChatInterface(
     fn=generate_stream,
     inputs=[
-        gr.Textbox(label="System Prompt A", value="You are assistant A"),
-        gr.Textbox(label="System Prompt B", value="You are assistant B"),
         gr.Slider(label="Weight wA", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
         gr.Slider(label="Weight wB", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
-        gr.Textbox(label="User Message", placeholder="Enter your message here..."),
         gr.Slider(label="Max new tokens", minimum=1, maximum=200, step=1, value=50),
         gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
         gr.Slider(label="Top‑p", minimum=0.1, maximum=1.0, step=0.05, value=0.95),
     ],
-    title="Blended Two-System Streaming Chat",
-    description="Stream replies by blending logits from two system-prompts using weights wA and wB.",
-):
-    pass
 if __name__ == "__main__":
     demo.launch()

 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForCausalLM
+MODEL_ID = "tiiuae/falcon-rw-1b"  # small model for local use
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to("cpu")  # or "cuda" if available
 def generate_stream(sysA, sysB, wa, wb, user_input, max_new_tokens=50, temperature=1.0, top_p=0.95):
     promptA = f"<|system|>{sysA}\n<|user|>{user_input}<|assistant|>"
     outA = idsA.clone()
     outB = idsB.clone()
     response = ""
+    yield response  # send initial blank to start stream
     for _ in range(max_new_tokens):
         with torch.no_grad():
             logitsA = model(input_ids=outA).logits[:, -1, :]
             logitsB = model(input_ids=outB).logits[:, -1, :]
+        # Weighted average of logits
         logits = wa * logitsA + wb * logitsB
         logits = logits / (temperature if temperature > 0 else 1.0)
         probs = F.softmax(logits, dim=-1)
         sorted_probs, sorted_idx = torch.sort(probs, descending=True)
         if token.item() == tokenizer.eos_token_id:
             break
+# ✅ Define the demo interface correctly
+demo = gr.ChatInterface(
     fn=generate_stream,
     inputs=[
+        gr.Textbox(label="System Prompt A", value="You are assistant A."),
+        gr.Textbox(label="System Prompt B", value="You are assistant B."),
         gr.Slider(label="Weight wA", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
         gr.Slider(label="Weight wB", minimum=-5.0, maximum=5.0, step=0.1, value=1.0),
+        gr.Textbox(label="User Message", placeholder="Enter your message..."),
         gr.Slider(label="Max new tokens", minimum=1, maximum=200, step=1, value=50),
         gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
         gr.Slider(label="Top‑p", minimum=0.1, maximum=1.0, step=0.05, value=0.95),
     ],
+    title="Two-System Weighted Blending Chat",
+    description="Combines two system prompts using weighted logit blending: response = wA⋅modelA + wB⋅modelB.",
+)
 if __name__ == "__main__":
     demo.launch()