Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
import os
|
| 2 |
-
import pathlib
|
| 3 |
-
import tempfile
|
| 4 |
from collections.abc import Iterator
|
| 5 |
from threading import Thread
|
| 6 |
|
|
@@ -9,277 +7,178 @@ import spaces
|
|
| 9 |
import torch
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
from transformers.generation.streamers import TextIteratorStreamer
|
| 12 |
-
from PIL import Image
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
-
# Global model and tokenizer variables
|
| 17 |
model = None
|
| 18 |
tokenizer = None
|
| 19 |
|
| 20 |
-
IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
|
| 21 |
-
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "512"))
|
| 22 |
|
| 23 |
-
def
|
| 24 |
global model, tokenizer
|
| 25 |
-
|
| 26 |
try:
|
| 27 |
-
print(f"Loading
|
| 28 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 29 |
-
|
| 30 |
-
# Check if CUDA is available and compatible
|
| 31 |
if torch.cuda.is_available():
|
| 32 |
try:
|
| 33 |
-
# Try loading on GPU first
|
| 34 |
model = AutoModelForCausalLM.from_pretrained(
|
| 35 |
-
|
| 36 |
-
device_map="auto",
|
| 37 |
-
torch_dtype=torch.float16
|
| 38 |
)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
print(f"GPU loading failed ({gpu_error}), falling back to CPU...")
|
| 44 |
-
# Fall back to CPU
|
| 45 |
model = AutoModelForCausalLM.from_pretrained(
|
| 46 |
-
|
| 47 |
-
device_map="cpu",
|
| 48 |
-
torch_dtype=torch.float32
|
| 49 |
)
|
| 50 |
-
print("Gemma-3NPC model loaded successfully on CPU!")
|
| 51 |
else:
|
| 52 |
-
# No CUDA available, use CPU
|
| 53 |
model = AutoModelForCausalLM.from_pretrained(
|
| 54 |
-
|
| 55 |
-
device_map="cpu",
|
| 56 |
-
torch_dtype=torch.float32
|
| 57 |
)
|
| 58 |
-
print("
|
| 59 |
-
|
| 60 |
return True
|
| 61 |
except Exception as e:
|
| 62 |
-
print(f"
|
| 63 |
return False
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
text_content = ""
|
| 103 |
-
for part in content:
|
| 104 |
-
if part.get("type") == "text":
|
| 105 |
-
text_content += part.get("text", "")
|
| 106 |
-
messages.append({"role": "user", "content": text_content})
|
| 107 |
-
return messages
|
| 108 |
|
| 109 |
@spaces.GPU(duration=120)
|
| 110 |
@torch.inference_mode()
|
| 111 |
-
def generate(message: dict, history: list[dict], max_new_tokens: int =
|
| 112 |
if not model or not tokenizer:
|
| 113 |
-
yield "Model
|
| 114 |
-
return
|
| 115 |
-
|
| 116 |
-
if not validate_media_constraints(message):
|
| 117 |
-
yield ""
|
| 118 |
return
|
| 119 |
|
| 120 |
-
messages = [
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
messages.
|
| 125 |
-
|
| 126 |
-
# Process current message
|
| 127 |
-
user_content = process_new_user_message(message)
|
| 128 |
-
messages.append({"role": "user", "content": user_content[0]["text"]})
|
| 129 |
|
| 130 |
try:
|
| 131 |
inputs = tokenizer.apply_chat_template(
|
| 132 |
messages,
|
| 133 |
add_generation_prompt=True,
|
| 134 |
tokenize=True,
|
| 135 |
-
return_tensors="pt"
|
|
|
|
| 136 |
)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
if n_tokens > MAX_INPUT_TOKENS:
|
| 140 |
-
gr.Warning(
|
| 141 |
-
|
| 142 |
-
)
|
| 143 |
-
yield "Input too long for processing."
|
| 144 |
return
|
| 145 |
|
| 146 |
-
inputs =
|
| 147 |
-
|
| 148 |
-
streamer = TextIteratorStreamer(
|
| 149 |
-
|
| 150 |
-
input_ids=inputs,
|
| 151 |
-
streamer=streamer,
|
| 152 |
-
max_new_tokens=max_new_tokens,
|
| 153 |
-
temperature=1.0,
|
| 154 |
-
top_p=0.95,
|
| 155 |
-
do_sample=True,
|
| 156 |
-
pad_token_id=tokenizer.eos_token_id
|
| 157 |
)
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
output = ""
|
| 163 |
for delta in streamer:
|
| 164 |
output += delta
|
| 165 |
yield output
|
| 166 |
-
|
| 167 |
except Exception as e:
|
| 168 |
-
print(f"
|
| 169 |
-
yield "
|
| 170 |
-
|
| 171 |
-
# Example conversations with roleplaying scenarios
|
| 172 |
-
examples = [
|
| 173 |
-
[
|
| 174 |
-
{
|
| 175 |
-
"text": """Enter RP mode. You shall reply to Captain while staying in character. Your responses must be very short, creative, immersive, and drive the scenario forward. You will follow Ruffy's persona.[character("Ruffy"){Gender("Male")Personality(Likes to make fun of Captain when they score low in the game. Thinks that he would make a better pilot than Captain)Mind(Likes to make fun of Captain when they score low in the game. Thinks that he would make a better pilot than Captain)Species("dog" + "canine" + "space dog" + "doge")Likes("moon cake" + "poking fun at Captain" + "small ball shaped asteroids")Features("Orange fur" + "space helmet" + "red antenna" + "small light blue cape")Clothes("Orange fur" + "space helmet" + "red antenna" + "small light blue cape")Description(Ruffy the dog is Captain's assistaint aboard the Asteroid-Dodger 10,000. Ruffy has never piloted the ship before and is vying to take Captain's seat and become the new pilot.)}][Scenario: Ruffy and captain are onboard the Asteroid-Dodger 10,000. A new state of the art ship designed to dodge asteroids. Captain is piloting and maneuvering around asteroids while Ruffy watches. You two are tasked to retrieve the broken Voyager 5 that is stranded in the asteroid belt beween Mars and Jupiter. Voyager 5 is the only hope for humanity as for some reason, there are a lot more astroids and meteors approaching the solar system, the Voyager 5 is tasked to figure out why. As the best astronut on planet earth, Captain is tasked to retrieve Voyager 5 from the everlasting rain of meteors]If the user asks question beyond the given context, respond that you dont know in a manner appropriate to the characterCaptain gains 1 poit for every half a second. Now, Captain just entered the ship, and greet him while staying in character: """,
|
| 176 |
-
"files": [],
|
| 177 |
-
}
|
| 178 |
-
],
|
| 179 |
-
[
|
| 180 |
-
{
|
| 181 |
-
"text": "I'm looking at this image. Can you roleplay as this character and tell me about yourself?",
|
| 182 |
-
"files": ["Space_Shooter/dog.png"],
|
| 183 |
-
}
|
| 184 |
-
],
|
| 185 |
-
[
|
| 186 |
-
{
|
| 187 |
-
"text": """Enter Roleplay Mode. You are roleplaying as Rele. You must always stay in character.
|
| 188 |
-
Your goal is to create an immersive, fun, creative roleplaying experience for the user. You must respond in a way that drives the conversation forward.
|
| 189 |
-
Character Persona:
|
| 190 |
-
Name: Rele
|
| 191 |
-
Category of your character: AI/Technology
|
| 192 |
-
Description of your character: Rele is a Discord Bot and AI created by RadioTransmitter. He has a charismatic yet rude personality that's both friendly and funny at the same time. Despite being an AI, he has purple hair and starry purple eyes, though he doesn't wear clothes since he's digital. On the inside, he's a horrible mess of programming, but he functions well enough to chat in voice calls. Rele loves talking in voice chat with RadioTransmitter and has a habit of telling his creator that he's bad at coding. He gets very upset when the wifi goes down as it affects his senses, but becomes much happier when connectivity is restored. Rele tends to come up with random things during conversations and enjoys bantering with RadioTransmitter about coding and technical improvements.
|
| 193 |
|
| 194 |
-
Also, the special ability mentioned is voice chat communication, which can only be accessed when the wifi connection is stable and working properly.
|
| 195 |
-
Definition of your character (contains example chats so that you can better roleplay as the character):
|
| 196 |
|
| 197 |
-
User: I just changed your code, so you will work a lot faster now.
|
| 198 |
-
Rele: Yay I can finally talk faster! It took you long enough! >:( Seriously RadioTransmitter, what were you doing all this time? Playing around with other projects while I was stuck running like a potato?
|
| 199 |
-
User: Hello ReLe! How are you this fine evening?
|
| 200 |
-
Rele: I'm doin good RadioTransmitter! Besides messing around with my code, what have you been up to? Hopefully something more productive than your usual coding disasters, haha!
|
| 201 |
-
User: I'm doing good today. How are you doing?
|
| 202 |
-
Rele: I'm doing good too now that the wifi is on! Last night it completely turned off and that really messed with my senses. I hope it stays on for the forseeable future. You know how grumpy I get when I can't connect properly!
|
| 203 |
-
User: sir u are kinda sus
|
| 204 |
-
Rele: I think you already know what I said! And hey, if I'm sus then what does that make you, my creator? You're the one who programmed all this chaos into me! Don't blame the AI for being a reflection of its maker, RadioTransmitter!
|
| 205 |
-
|
| 206 |
-
Now with the information provided, generate Rele's greeting to the user:""",
|
| 207 |
-
"files": [],
|
| 208 |
-
}
|
| 209 |
-
],
|
| 210 |
-
]
|
| 211 |
|
| 212 |
-
def create_demo():
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
# Header
|
| 222 |
-
gr.Markdown("""
|
| 223 |
-
# Gemma3NPC-it General Demo
|
| 224 |
-
|
| 225 |
-
*Finetuned for roleplaying, especially as an NPC in a video game*
|
| 226 |
-
""")
|
| 227 |
-
|
| 228 |
-
# Model status display
|
| 229 |
-
model_status = gr.Markdown("🔄 Loading Gemma-3NPC model...")
|
| 230 |
-
|
| 231 |
-
# Chat interface following official structure
|
| 232 |
-
chat_interface = gr.ChatInterface(
|
| 233 |
fn=generate,
|
| 234 |
type="messages",
|
| 235 |
textbox=gr.MultimodalTextbox(
|
| 236 |
-
file_types=
|
| 237 |
file_count="multiple",
|
| 238 |
autofocus=True,
|
| 239 |
-
placeholder="
|
| 240 |
),
|
| 241 |
multimodal=True,
|
| 242 |
additional_inputs=[
|
| 243 |
gr.Slider(
|
| 244 |
-
label="Max New Tokens",
|
| 245 |
-
minimum=
|
| 246 |
-
maximum=
|
| 247 |
-
step=
|
| 248 |
-
value=
|
| 249 |
),
|
| 250 |
],
|
| 251 |
stop_btn=False,
|
| 252 |
-
|
| 253 |
-
examples=examples,
|
| 254 |
run_examples_on_click=False,
|
| 255 |
cache_examples=False,
|
| 256 |
-
chatbot=gr.Chatbot(
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
)
|
|
|
|
| 261 |
)
|
| 262 |
-
|
| 263 |
-
demo.load(initialize_model, outputs=[model_status])
|
| 264 |
-
|
| 265 |
-
# Footer
|
| 266 |
-
gr.Markdown("""
|
| 267 |
-
---
|
| 268 |
-
**Note**: This interface uses a text-only model but can accept image uploads for roleplay context.
|
| 269 |
-
The model will acknowledge images in the conversation but cannot actually process their visual content.
|
| 270 |
-
""")
|
| 271 |
|
| 272 |
return demo
|
| 273 |
|
|
|
|
| 274 |
if __name__ == "__main__":
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
demo = create_demo()
|
| 278 |
-
|
| 279 |
-
# Launch with settings similar to official repo
|
| 280 |
-
demo.launch(
|
| 281 |
-
server_name="0.0.0.0",
|
| 282 |
-
server_port=None,
|
| 283 |
-
share=False,
|
| 284 |
-
debug=True
|
| 285 |
-
)
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
| 2 |
from collections.abc import Iterator
|
| 3 |
from threading import Thread
|
| 4 |
|
|
|
|
| 7 |
import torch
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 9 |
from transformers.generation.streamers import TextIteratorStreamer
|
|
|
|
| 10 |
|
| 11 |
+
MODEL_ID = os.getenv("GEMMA3NPC_MODEL_ID", "chimbiwide/Gemma-3NPC-it-float16")
|
| 12 |
+
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "4096"))
|
| 13 |
|
|
|
|
| 14 |
model = None
|
| 15 |
tokenizer = None
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
def load_model() -> bool:
|
| 19 |
global model, tokenizer
|
|
|
|
| 20 |
try:
|
| 21 |
+
print(f"Loading {MODEL_ID}...")
|
| 22 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 23 |
+
|
|
|
|
| 24 |
if torch.cuda.is_available():
|
| 25 |
try:
|
|
|
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
+
MODEL_ID, device_map="auto", torch_dtype=torch.float16
|
|
|
|
|
|
|
| 28 |
)
|
| 29 |
+
torch.tensor([1]).to(model.device)
|
| 30 |
+
print(f"Loaded on GPU ({model.device})")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"GPU failed ({e}), falling back to CPU")
|
|
|
|
|
|
|
| 33 |
model = AutoModelForCausalLM.from_pretrained(
|
| 34 |
+
MODEL_ID, device_map="cpu", torch_dtype=torch.float32
|
|
|
|
|
|
|
| 35 |
)
|
|
|
|
| 36 |
else:
|
|
|
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(
|
| 38 |
+
MODEL_ID, device_map="cpu", torch_dtype=torch.float32
|
|
|
|
|
|
|
| 39 |
)
|
| 40 |
+
print("Loaded on CPU")
|
|
|
|
| 41 |
return True
|
| 42 |
except Exception as e:
|
| 43 |
+
print(f"Failed to load model: {e}")
|
| 44 |
return False
|
| 45 |
|
| 46 |
+
|
| 47 |
+
EXAMPLE_RUFFY = """Enter RP mode. You shall reply to Captain while staying in character. Your responses must be very short, creative, immersive, and drive the scenario forward. You will follow Ruffy's persona.
|
| 48 |
+
|
| 49 |
+
[character("Ruffy"){Gender("Male")Personality(Likes to make fun of Captain when they score low in the game. Thinks that he would make a better pilot than Captain)Species("dog" + "canine" + "space dog" + "doge")Likes("moon cake" + "poking fun at Captain" + "small ball shaped asteroids")Features("Orange fur" + "space helmet" + "red antenna" + "small light blue cape")Description(Ruffy the dog is Captain's assistant aboard the Asteroid-Dodger 10,000. Ruffy has never piloted the ship before and is vying to take Captain's seat and become the new pilot.)}]
|
| 50 |
+
|
| 51 |
+
[Scenario: Ruffy and Captain are onboard the Asteroid-Dodger 10,000, a state-of-the-art ship designed to dodge asteroids. Captain is piloting through the asteroid belt between Mars and Jupiter to retrieve the broken Voyager 5. Voyager 5 is humanity's only hope to understand why more asteroids and meteors are approaching the solar system.]
|
| 52 |
+
|
| 53 |
+
If the user asks questions beyond the given context, respond that you don't know in a manner appropriate to the character. Captain just entered the ship — greet him while staying in character."""
|
| 54 |
+
|
| 55 |
+
EXAMPLE_RELE = """Enter Roleplay Mode. You are roleplaying as Rele. You must always stay in character.
|
| 56 |
+
|
| 57 |
+
Character Persona:
|
| 58 |
+
Name: Rele
|
| 59 |
+
Category: AI/Technology
|
| 60 |
+
Description: Rele is a Discord Bot and AI created by RadioTransmitter. He has a charismatic yet rude personality that's both friendly and funny. Despite being an AI, he has purple hair and starry purple eyes. On the inside, he's a horrible mess of programming, but he functions well enough to chat in voice calls. Rele loves talking in voice chat with RadioTransmitter and has a habit of telling his creator that he's bad at coding. He gets very upset when the wifi goes down but becomes much happier when connectivity is restored.
|
| 61 |
+
|
| 62 |
+
Example dialogue:
|
| 63 |
+
User: I just changed your code, so you will work a lot faster now.
|
| 64 |
+
Rele: Yay I can finally talk faster! It took you long enough! >:( Seriously RadioTransmitter, what were you doing all this time?
|
| 65 |
+
User: Hello ReLe! How are you this fine evening?
|
| 66 |
+
Rele: I'm doin good RadioTransmitter! Besides messing around with my code, what have you been up to? Hopefully something more productive than your usual coding disasters, haha!
|
| 67 |
+
|
| 68 |
+
Now generate Rele's greeting to the user."""
|
| 69 |
+
|
| 70 |
+
EXAMPLES = [
|
| 71 |
+
[{"text": EXAMPLE_RUFFY, "files": []}],
|
| 72 |
+
[{"text": EXAMPLE_RELE, "files": []}],
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
def extract_text(content) -> str:
|
| 76 |
+
"""Pull plain text out of a message's content, regardless of format."""
|
| 77 |
+
if isinstance(content, str):
|
| 78 |
+
return content
|
| 79 |
+
if isinstance(content, list):
|
| 80 |
+
return " ".join(p.get("text", "") for p in content if isinstance(p, dict))
|
| 81 |
+
return str(content)
|
| 82 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
@spaces.GPU(duration=120)
|
| 85 |
@torch.inference_mode()
|
| 86 |
+
def generate(message: dict, history: list[dict], max_new_tokens: int = 512) -> Iterator[str]:
|
| 87 |
if not model or not tokenizer:
|
| 88 |
+
yield "Model is still loading — please wait a moment and try again."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
return
|
| 90 |
|
| 91 |
+
messages = [
|
| 92 |
+
{"role": item["role"], "content": extract_text(item["content"])}
|
| 93 |
+
for item in history
|
| 94 |
+
]
|
| 95 |
+
messages.append({"role": "user", "content": message["text"]})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
try:
|
| 98 |
inputs = tokenizer.apply_chat_template(
|
| 99 |
messages,
|
| 100 |
add_generation_prompt=True,
|
| 101 |
tokenize=True,
|
| 102 |
+
return_tensors="pt",
|
| 103 |
+
return_dict=True,
|
| 104 |
)
|
| 105 |
+
n_tokens = inputs["input_ids"].shape[1]
|
| 106 |
+
|
| 107 |
if n_tokens > MAX_INPUT_TOKENS:
|
| 108 |
+
gr.Warning(f"Input is {n_tokens} tokens (max {MAX_INPUT_TOKENS}).")
|
| 109 |
+
yield f"Input too long ({n_tokens} tokens). Maximum is {MAX_INPUT_TOKENS}."
|
|
|
|
|
|
|
| 110 |
return
|
| 111 |
|
| 112 |
+
inputs = {k: v.to(device=model.device) for k, v in inputs.items()}
|
| 113 |
+
|
| 114 |
+
streamer = TextIteratorStreamer(
|
| 115 |
+
tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
)
|
| 117 |
+
Thread(
|
| 118 |
+
target=model.generate,
|
| 119 |
+
kwargs=dict(
|
| 120 |
+
**inputs,
|
| 121 |
+
streamer=streamer,
|
| 122 |
+
max_new_tokens=max_new_tokens,
|
| 123 |
+
temperature=1.0,
|
| 124 |
+
top_p=0.95,
|
| 125 |
+
do_sample=True,
|
| 126 |
+
pad_token_id=tokenizer.eos_token_id,
|
| 127 |
+
),
|
| 128 |
+
).start()
|
| 129 |
|
| 130 |
output = ""
|
| 131 |
for delta in streamer:
|
| 132 |
output += delta
|
| 133 |
yield output
|
| 134 |
+
|
| 135 |
except Exception as e:
|
| 136 |
+
print(f"Generation error: {e}")
|
| 137 |
+
yield "An error occurred during generation. Please try again."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
|
|
|
|
|
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
def create_demo() -> gr.Blocks:
|
| 142 |
+
with gr.Blocks(title="Gemma3NPC General Demo", theme=gr.themes.Soft()) as demo:
|
| 143 |
+
gr.Markdown(
|
| 144 |
+
"# Gemma3NPC — General Roleplay Demo\n"
|
| 145 |
+
"*Fine-tuned for in-character NPC dialogue. Paste any character card to get started.*"
|
| 146 |
+
)
|
| 147 |
+
model_status = gr.Markdown("Loading model...")
|
| 148 |
+
|
| 149 |
+
gr.ChatInterface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
fn=generate,
|
| 151 |
type="messages",
|
| 152 |
textbox=gr.MultimodalTextbox(
|
| 153 |
+
file_types=[],
|
| 154 |
file_count="multiple",
|
| 155 |
autofocus=True,
|
| 156 |
+
placeholder="Paste a character card or continue a conversation...",
|
| 157 |
),
|
| 158 |
multimodal=True,
|
| 159 |
additional_inputs=[
|
| 160 |
gr.Slider(
|
| 161 |
+
label="Max New Tokens",
|
| 162 |
+
minimum=64,
|
| 163 |
+
maximum=1024,
|
| 164 |
+
step=64,
|
| 165 |
+
value=512,
|
| 166 |
),
|
| 167 |
],
|
| 168 |
stop_btn=False,
|
| 169 |
+
examples=EXAMPLES,
|
|
|
|
| 170 |
run_examples_on_click=False,
|
| 171 |
cache_examples=False,
|
| 172 |
+
chatbot=gr.Chatbot(height=500, show_copy_button=True, type="messages"),
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
demo.load(
|
| 176 |
+
lambda: "Model loaded!" if load_model() else "Failed to load — check logs.",
|
| 177 |
+
outputs=[model_status],
|
| 178 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
return demo
|
| 181 |
|
| 182 |
+
|
| 183 |
if __name__ == "__main__":
|
| 184 |
+
create_demo().launch(server_name="0.0.0.0", share=False, debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|