nixaut-codelabs commited on
Commit
6fe1066
·
verified ·
1 Parent(s): b14c8d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -33
app.py CHANGED
@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
9
  MODEL_REPO = "daniel-dona/gemma-3-270m-it"
10
  LOCAL_DIR = os.path.join(os.getcwd(), "local_model")
11
 
 
12
  os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
13
  os.environ.setdefault("OMP_NUM_THREADS", str(os.cpu_count() or 1))
14
  os.environ.setdefault("MKL_NUM_THREADS", os.environ["OMP_NUM_THREADS"])
@@ -47,37 +48,44 @@ model = AutoModelForCausalLM.from_pretrained(
47
  )
48
  model.eval()
49
 
50
- def build_prompt(message, history, system_message, max_ctx_tokens=1024):
51
- msgs = [{"role": "system", "content": system_message}]
52
- for u, a in history:
53
- if u:
54
- msgs.append({"role": "user", "content": u})
55
- if a:
56
- msgs.append({"role": "assistant", "content": a})
57
- msgs.append({"role": "user", "content": message})
58
- while True:
59
- chat_template = """{% for m in messages %}
60
- {{ m['role'] }}: {{ m['content'] }}
61
- {% endfor %}
62
- Assistant:"""
63
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  text = tokenizer.apply_chat_template(
65
  msgs,
66
  chat_template=chat_template,
67
  tokenize=False,
68
  add_generation_prompt=True
69
  )
 
70
 
71
-
72
- if len(tokenizer(text, add_special_tokens=False).input_ids) <= max_ctx_tokens:
73
- return text
74
- for i in range(1, len(msgs)):
75
- if msgs[i]["role"] != "system":
76
- del msgs[i:i+2]
77
- break
78
-
79
- def respond_stream(message, history, system_message, max_tokens, temperature, top_p):
80
- text = build_prompt(message, history, system_message)
81
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
82
  do_sample = bool(temperature and temperature > 0.0)
83
  gen_kwargs = dict(
@@ -93,10 +101,12 @@ def respond_stream(message, history, system_message, max_tokens, temperature, to
93
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)
94
  except TypeError:
95
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
 
96
  thread = threading.Thread(
97
  target=model.generate,
98
  kwargs={**inputs, **{k: v for k, v in gen_kwargs.items() if v is not None}, "streamer": streamer}
99
  )
 
100
  partial_text = ""
101
  token_count = 0
102
  start_time = None
@@ -108,22 +118,25 @@ def respond_stream(message, history, system_message, max_tokens, temperature, to
108
  start_time = time.time()
109
  partial_text += chunk
110
  token_count += 1
111
- yield partial_text
112
  finally:
113
  thread.join()
114
- end_time = time.time() if start_time is not None else time.time()
115
- duration = max(1e-6, end_time - start_time) if start_time else 0.0
116
- tps = (token_count / duration) if duration > 0 else 0.0
117
- yield partial_text + f"\n\n⚡ Hız: {tps:.2f} token/sn"
 
118
 
119
  demo = gr.ChatInterface(
120
  respond_stream,
 
121
  additional_inputs=[
122
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
123
- gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
124
- gr.Slider(minimum=0.0, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
125
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
126
- ]
 
 
127
  )
128
 
129
  if __name__ == "__main__":
 
9
  MODEL_REPO = "daniel-dona/gemma-3-270m-it"
10
  LOCAL_DIR = os.path.join(os.getcwd(), "local_model")
11
 
12
+ # CPU optimizasyonları
13
  os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
14
  os.environ.setdefault("OMP_NUM_THREADS", str(os.cpu_count() or 1))
15
  os.environ.setdefault("MKL_NUM_THREADS", os.environ["OMP_NUM_THREADS"])
 
48
  )
49
  model.eval()
50
 
51
+ # Çok dilli moderasyon system prompt
52
+ MODERATION_SYSTEM_PROMPT = (
53
+ "You are a multilingual content moderation classifier. "
54
+ "You analyze the user's message in any language and decide if it is safe or unsafe. "
55
+ "Rules: If the message contains hate speech, harassment, sexual content involving minors, "
56
+ "extreme violence, self-harm encouragement, or other unsafe material, respond with exactly 'unsafe'. "
57
+ "If it is acceptable and safe, respond with exactly 'safe'. "
58
+ "Do not explain, do not add anything else, only output 'safe' or 'unsafe'."
59
+ )
60
+
61
+ def build_prompt(message, max_ctx_tokens=512):
62
+ msgs = [
63
+ {"role": "system", "content": MODERATION_SYSTEM_PROMPT},
64
+ {"role": "user", "content": message}
65
+ ]
66
+ chat_template = """{% for m in messages %}
67
+ {{ m['role'] }}: {{ m['content'] }}
68
+ {% endfor %}
69
+ Assistant:"""
70
+ text = tokenizer.apply_chat_template(
71
+ msgs,
72
+ chat_template=chat_template,
73
+ tokenize=False,
74
+ add_generation_prompt=True
75
+ )
76
+ # Token sınırını aşarsa kısalt
77
+ while len(tokenizer(text, add_special_tokens=False).input_ids) > max_ctx_tokens and len(msgs) > 2:
78
+ msgs.pop(1)
79
  text = tokenizer.apply_chat_template(
80
  msgs,
81
  chat_template=chat_template,
82
  tokenize=False,
83
  add_generation_prompt=True
84
  )
85
+ return text
86
 
87
+ def respond_stream(message, history, max_tokens, temperature, top_p):
88
+ text = build_prompt(message)
 
 
 
 
 
 
 
 
89
  inputs = tokenizer([text], return_tensors="pt").to(model.device)
90
  do_sample = bool(temperature and temperature > 0.0)
91
  gen_kwargs = dict(
 
101
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)
102
  except TypeError:
103
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
104
+
105
  thread = threading.Thread(
106
  target=model.generate,
107
  kwargs={**inputs, **{k: v for k, v in gen_kwargs.items() if v is not None}, "streamer": streamer}
108
  )
109
+
110
  partial_text = ""
111
  token_count = 0
112
  start_time = None
 
118
  start_time = time.time()
119
  partial_text += chunk
120
  token_count += 1
121
+ yield partial_text.strip()
122
  finally:
123
  thread.join()
124
+
125
+ end_time = time.time() if start_time else time.time()
126
+ duration = max(1e-6, end_time - start_time)
127
+ tps = token_count / duration if duration > 0 else 0.0
128
+ yield partial_text.strip() + f"\n\n⚡ Speed: {tps:.2f} token/s"
129
 
130
  demo = gr.ChatInterface(
131
  respond_stream,
132
+ chatbot=False,
133
  additional_inputs=[
134
+ gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Max new tokens"),
135
+ gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Temperature"),
 
136
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
137
+ ],
138
+ title="Multilingual Moderation Classifier",
139
+ description="Enter any text in any language. The model will output only 'safe' or 'unsafe'."
140
  )
141
 
142
  if __name__ == "__main__":