SoAp9035/turkish_instructions
Viewer • Updated • 51.9k • 169 • 7
How to use Dbmaxwell/gemma3-270m-turkish-instructions with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-generation", model="Dbmaxwell/gemma3-270m-turkish-instructions")
messages = [
{"role": "user", "content": "Who are you?"},
]
pipe(messages) # Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("Dbmaxwell/gemma3-270m-turkish-instructions")
model = AutoModelForCausalLM.from_pretrained("Dbmaxwell/gemma3-270m-turkish-instructions")
messages = [
{"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))How to use Dbmaxwell/gemma3-270m-turkish-instructions with vLLM:
# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "Dbmaxwell/gemma3-270m-turkish-instructions"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "Dbmaxwell/gemma3-270m-turkish-instructions",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
]
}'docker model run hf.co/Dbmaxwell/gemma3-270m-turkish-instructions
How to use Dbmaxwell/gemma3-270m-turkish-instructions with SGLang:
# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
--model-path "Dbmaxwell/gemma3-270m-turkish-instructions" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "Dbmaxwell/gemma3-270m-turkish-instructions",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
]
}'docker run --gpus all \
--shm-size 32g \
-p 30000:30000 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HF_TOKEN=<secret>" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server \
--model-path "Dbmaxwell/gemma3-270m-turkish-instructions" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "Dbmaxwell/gemma3-270m-turkish-instructions",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
]
}'How to use Dbmaxwell/gemma3-270m-turkish-instructions with Docker Model Runner:
docker model run hf.co/Dbmaxwell/gemma3-270m-turkish-instructions
This model is a fine-tuned version of Google Gemma 3 270M IT trained on a SoAp9035/turkish_instructions Dataset using direct fine-tuning.
google/gemma-3-270m-it-qat-q4_0-unquantized SoAp9035/turkish_instructions Dataset) #Formatting Chat template for google/gemma-3-270m-it-qat-q4_0-unquantizedeval_loss
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
MODEL_NAME = "Dbmaxwell/gemma3-270m-turkish-instructions"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
def generate_response(prompt, max_new_tokens=200):
formatted_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_new_tokens=max_new_tokens,
temperature=0.3,
top_p=0.8,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response.split("<end_of_turn>")[0].strip()
test_prompts = [
"Merhaba! Ben bir AI asistanım. Sana nasıl yardımcı olabilirim?",
"Python'da for döngüsü nasıl yazılır?",
"İstanbul Türkiye'nin en büyük şehridir. Kısa bilgi ver.",
"Makine öğrenmesi nedir? Basit açıklama yap.",
"5 artı 3 çarpı 2 kaçtır?",
"Türkiye'nin başkenti neresidir?"
]
for i, prompt in enumerate(test_prompts, 1):
print(f"\n{i} Question: {prompt}")
print(f"Answer: {generate_response(prompt, max_new_tokens=100)}")
print("-" * 60)
Base model
google/gemma-3-270m