File size: 1,804 Bytes
9152169 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
from memory import update_memory, check_memory
# β
Load persona instructions
try:
with open("persona.txt", "r", encoding="utf-8") as f:
personality = f.read()
except FileNotFoundError:
personality = "You are a romantic AI chatbot designed to chat with Moin."
# β
Fix: Use Correct Model Name
model_name = "syedmoinms/MoinRomanticBot" # Correct Hugging Face model path
# model_name = "./MoinRomanticBot" # Uncomment if using local model folder
# β
Load Model & Tokenizer with Hugging Face Authentication
HF_TOKEN = os.getenv("HF_TOKEN") # Use token if model is private
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
model_name,
token=HF_TOKEN,
torch_dtype=torch.float16,
device_map="auto"
)
except Exception as e:
print(f"β Error loading model: {e}")
exit()
# β
Function to Generate Response with Memory
def chatbot(input_text):
memory_response = check_memory(input_text)
if memory_response:
return memory_response
prompt = f"{personality}\nMoin: {input_text}\nAI:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=150)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
update_memory(input_text, response)
return response
# β
Gradio Interface
iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="MoinRomanticBot")
# β
Launch App
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860) |