Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| # Load Falcon-RW-1B | |
| model_name = "tiiuae/falcon-rw-1b" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1) | |
| chat_history = [] | |
| MAX_HISTORY = 10 # Optional: to limit memory growth | |
| def generate_reply(message): | |
| global chat_history | |
| chat_history.append(f"User: {message}") | |
| prompt = "\n".join(chat_history) + "\nBot:" | |
| result = generator(prompt, max_new_tokens=100, do_sample=True, pad_token_id=tokenizer.eos_token_id) | |
| generated = result[0]["generated_text"] | |
| reply = generated[len(prompt):].split("User:")[0].strip() | |
| chat_history.append(f"Bot: {reply}") | |
| chat_history[:] = chat_history[-MAX_HISTORY:] # Trim history | |
| return reply | |
| with gr.Blocks() as demo: | |
| txt = gr.Textbox(label="You", placeholder="Type your message here...") | |
| out = gr.Textbox(label="Bot") | |
| txt.submit(generate_reply, inputs=txt, outputs=out).api_name = "generate_reply" | |
| demo.queue() | |
| demo.launch(share=True, show_api=True, show_error=True) | |