vicgalle-gpt2-open-instruct-v1

Sleeping

kairusama commited on Oct 13

Commit

120410f

verified ·

1 Parent(s): 6dc4292

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 # app.py
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# ---- Load model & tokenizer ----
 MODEL_NAME = "vicgalle/gpt2-open-instruct-v1"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-# ---- Generation function ----
 def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50, top_p=0.9):
-    """Generate text based on the given instruction."""
     system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
@@ -17,19 +15,18 @@ def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50
 ### Response:
 """
-    inputs = tokenizer(system_prompt, return_tensors="pt").to(device)
-    output = model.generate(
-        inputs["input_ids"],
-        max_length=len(inputs["input_ids"][0]) + max_new_tokens,
-        num_return_sequences=1,
         temperature=temperature,
         top_k=top_k,
         top_p=top_p,
-        pad_token_id=tokenizer.eos_token_id
     )
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Strip off the original prompt so only the model's answer is returned
-    return response.split("### Response:")[-1].strip()
 # ---- Gradio UI ----
 with gr.Blocks() as demo:

 # app.py
 import gradio as gr
+from transformers import pipeline
+# ---- Load model via pipeline ----
 MODEL_NAME = "vicgalle/gpt2-open-instruct-v1"
+pipe = pipeline("text-generation", model=MODEL_NAME, device_map="auto")
+# ---- Inference function ----
 def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50, top_p=0.9):
     system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 ### Response:
 """
+    output = pipe(
+        system_prompt,
+        max_new_tokens=max_new_tokens,
         temperature=temperature,
         top_k=top_k,
         top_p=top_p,
+        do_sample=True,
+        pad_token_id=pipe.tokenizer.eos_token_id,
     )
+    # Clean up output text
+    text = output[0]["generated_text"]
+    return text.split("### Response:")[-1].strip()
 # ---- Gradio UI ----
 with gr.Blocks() as demo: