Spaces:

uyen13
/

chatbot

Sleeping

uyen13 commited on May 13, 2025

Commit

436b7d4

verified ·

1 Parent(s): 711e09a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,25 +11,24 @@ import torch
 # Load FLAN-T5 model
 @st.cache_resource
 def load_llm():
-    model_name = "tiiuae/falcon-7b"  # Thay bằng tên mô hình bạn chọn
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.bfloat16,  # Giảm bộ nhớ nếu có GPU hỗ trợ
-        trust_remote_code=True,
-        device_map="auto"  # Tự động phân bổ lên GPU/CPU
     )
     pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=256,  # Số token mới tối đa được sinh ra
-    temperature=0.7,
-    top_p=0.95,
-    repetition_penalty=1.15,
-    do_sample=True,
-    eos_token_id=tokenizer.eos_token_id,  # Dừng sinh văn bản khi gặp end-of-sentence
-    truncation=True  # Cho phép cắt bớt nếu đầu vào quá dài
     )
     return HuggingFacePipeline(pipeline=pipe)

 # Load FLAN-T5 model
 @st.cache_resource
 def load_llm():
+    model_name = "google/flan-t5-xl"  # <-- Đã thay bằng FLAN-T5
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        torch_dtype=torch.float32,  # flan-t5 không hỗ trợ bfloat16 trên CPU
+        device_map="auto"
     )
     pipe = pipeline(
+        "text2text-generation",  # <-- Chú ý loại pipeline này dành cho T5
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=256,
+        temperature=0.7,
+        top_p=0.95,
+        repetition_penalty=1.15,
+        do_sample=True
     )
     return HuggingFacePipeline(pipeline=pipe)