Spaces:

Artples
/

L-MChat-ZeroGPU

Running on Zero

App Files Files Community

Artples commited on May 1, 2024

Commit

d05908c

verified ·

1 Parent(s): 1c84354

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -33

app.py CHANGED Viewed

@@ -36,47 +36,18 @@ def generate(
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    model_id = model_options[model_choice]
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    tokenizer.use_default_system_prompt = False
-    conversation = []
-    if system_prompt:
-        conversation.append({"role": "system", "content": system_prompt})
-    for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer(conversation, return_tensors="pt", padding=True, truncation=True)
-    if input_ids['input_ids'].shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids['input_ids'] = input_ids['input_ids'][:, -MAX_INPUT_TOKEN_LENGTH:]
-    outputs = model.generate(
-        **input_ids,
-        max_length=input_ids['input_ids'].shape[1] + max_new_tokens,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        num_return_sequences=1,
-        repetition_penalty=repetition_penalty
-    )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    yield generated_text
 chat_interface = gr.Interface(
     fn=generate,
     inputs=[
         gr.Textbox(lines=2, placeholder="Type your message here..."),
         gr.Dropdown(label="Choose Model", choices=list(model_options.keys())),
-        gr.State(label="Chat History", default=[]),
         gr.Textbox(label="System Prompt", lines=6, placeholder="Enter system prompt if any..."),
         gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
-        gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.1),
-        gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
-        gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50),
-        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2),
     ],
     outputs=[gr.Textbox(label="Response")],
     theme="default",

     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    # Your existing function implementation...
+    pass
 chat_interface = gr.Interface(
     fn=generate,
     inputs=[
         gr.Textbox(lines=2, placeholder="Type your message here..."),
         gr.Dropdown(label="Choose Model", choices=list(model_options.keys())),
+        chat_history,  # Updated to include state without label
         gr.Textbox(label="System Prompt", lines=6, placeholder="Enter system prompt if any..."),
         gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS),
+        # More inputs as previously defined
     ],
     outputs=[gr.Textbox(label="Response")],
     theme="default",