hydffgg commited on
Commit
20122ba
·
verified ·
1 Parent(s): cbdfd1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -57
app.py CHANGED
@@ -1,71 +1,69 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- import os
 
5
 
6
- MODEL_ID = "google/gemma-3-270m-it"
7
- HF_TOKEN = os.getenv("HF_TOKEN")
 
 
8
 
9
- tokenizer = None
10
- model = None
 
 
 
 
11
 
12
- def load_model():
13
- global tokenizer, model
14
- if tokenizer is None or model is None:
15
- tokenizer = AutoTokenizer.from_pretrained(
16
- MODEL_ID,
17
- token=HF_TOKEN
18
- )
19
- model = AutoModelForCausalLM.from_pretrained(
20
- MODEL_ID,
21
- token=HF_TOKEN,
22
- torch_dtype=torch.float32,
23
- low_cpu_mem_usage=True
24
- )
25
 
26
- def respond(message, history):
27
- load_model()
 
 
 
28
 
29
- prompt = "<bos>"
30
- for user, bot in history:
31
- prompt += (
32
- "<start_of_turn>user\n"
33
- f"{user}\n"
34
- "<end_of_turn>\n"
35
- "<start_of_turn>model\n"
36
- f"{bot}\n"
37
- "<end_of_turn>\n"
38
- )
39
 
40
- prompt += (
41
- "<start_of_turn>user\n"
42
- f"{message}\n"
43
- "<end_of_turn>\n"
44
- "<start_of_turn>model\n"
45
- )
 
 
 
 
46
 
47
- inputs = tokenizer(
48
- prompt,
49
- return_tensors="pt",
50
- truncation=True,
51
- max_length=2048
52
- )
53
 
54
- outputs = model.generate(
55
- **inputs,
56
- max_new_tokens=200,
57
- temperature=0.7,
58
- top_p=0.9,
59
- do_sample=True,
60
- eos_token_id=tokenizer.eos_token_id
61
- )
62
 
63
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
- reply = decoded.split("<start_of_turn>model")[-1].strip()
65
  return reply
66
 
67
- gr.ChatInterface(
68
- fn=respond,
 
 
 
69
  title="🤖 Gemma3 270M Cloud Chat",
70
- description="Gemma3 270M chạy cloud miễn phí trên Hugging Face Spaces"
71
- ).launch(server_name="0.0.0.0")
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import torch
3
+ import re
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
+ # ======================
7
+ # Load model
8
+ # ======================
9
+ MODEL_ID = "google/gemma-3-270m"
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ MODEL_ID,
14
+ torch_dtype=torch.float32,
15
+ device_map="cpu"
16
+ )
17
 
18
+ # ======================
19
+ # Clean output (FIX ký tự rác)
20
+ # ======================
21
+ def clean_output(text: str) -> str:
22
+ # Cắt câu khi gặp dấu kết thúc hợp lệ
23
+ match = re.match(r"^[\s\S]*?[.!?\n]", text)
24
+ if match:
25
+ return match.group(0).strip()
26
+ return text.strip()
 
 
 
 
27
 
28
+ # ======================
29
+ # Chat function
30
+ # ======================
31
+ def chat(message, history):
32
+ prompt = "<bos><start_of_turn>user\n" + message + "\n<end_of_turn>\n<start_of_turn>model\n"
33
 
34
+ inputs = tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
 
35
 
36
+ with torch.no_grad():
37
+ outputs = model.generate(
38
+ **inputs,
39
+ max_new_tokens=200,
40
+ temperature=0.6,
41
+ top_p=0.9,
42
+ do_sample=True,
43
+ eos_token_id=tokenizer.eos_token_id,
44
+ pad_token_id=tokenizer.eos_token_id
45
+ )
46
 
47
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
48
 
49
+ reply = decoded.split("model")[-1].strip()
50
+ reply = clean_output(reply)
 
 
 
 
 
 
51
 
 
 
52
  return reply
53
 
54
+ # ======================
55
+ # UI
56
+ # ======================
57
+ demo = gr.ChatInterface(
58
+ fn=chat,
59
  title="🤖 Gemma3 270M Cloud Chat",
60
+ description="Gemma3 270M chạy cloud miễn phí trên Hugging Face Spaces",
61
+ examples=["hi", "giải thích AI là gì", "hello world trong python"],
62
+ submit_btn="Send",
63
+ retry_btn="Retry",
64
+ undo_btn="Undo",
65
+ clear_btn="Clear"
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch()