Spaces:

didrikSkjelbred
/

chatbot-api

Sleeping

Didrik Nathaniel LLoyd Aasland Skjelbred commited on May 14, 2025

Commit

f2bb5c6

1 Parent(s): 45e12ad

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,19 +6,22 @@ model_name = "tiiuae/falcon-rw-1b"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)  # -1 = CPU
 chat_history = []
 def generate_reply(message):
     global chat_history
     chat_history.append(f"User: {message}")
     prompt = "\n".join(chat_history) + "\nBot:"
-    result = generator(prompt, max_new_tokens=100, do_sample=True)
-    reply = result[0]["generated_text"].split("Bot:")[-1].strip()
     chat_history.append(f"Bot: {reply}")
     return reply
 with gr.Blocks() as demo:
@@ -27,6 +30,5 @@ with gr.Blocks() as demo:
     txt.submit(generate_reply, inputs=txt, outputs=out).api_name = "generate_reply"
 demo.queue()
 demo.launch(share=True, show_api=True, show_error=True)

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
 chat_history = []
+MAX_HISTORY = 10  # Optional: to limit memory growth
 def generate_reply(message):
     global chat_history
     chat_history.append(f"User: {message}")
     prompt = "\n".join(chat_history) + "\nBot:"
+    result = generator(prompt, max_new_tokens=100, do_sample=True, pad_token_id=tokenizer.eos_token_id)
+    generated = result[0]["generated_text"]
+    reply = generated[len(prompt):].split("User:")[0].strip()
     chat_history.append(f"Bot: {reply}")
+    chat_history[:] = chat_history[-MAX_HISTORY:]  # Trim history
     return reply
 with gr.Blocks() as demo:
     txt.submit(generate_reply, inputs=txt, outputs=out).api_name = "generate_reply"
 demo.queue()
 demo.launch(share=True, show_api=True, show_error=True)