TSjB commited on
Commit
1661dbe
·
verified ·
1 Parent(s): 84a5b64

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +26 -0
  2. app.py +135 -0
  3. requirements.txt +7 -0
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: QM-4B Chat
3
+ emoji: 💬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.12
11
+ ---
12
+
13
+ # Къарачай-Малкъар чат-бот / Карачаево-балкарский чат-бот
14
+
15
+ Чат-бот на основе модели [TSjB/QM-4B](https://huggingface.co/TSjB/QM-4B) для карачаево-балкарского языка.
16
+
17
+ ## Возможности
18
+
19
+ - Генерация текста на карачаево-балкарском языке
20
+ - Ответы на вопросы
21
+ - Поддержка русского и английского языков
22
+
23
+ ## Разработчики
24
+
25
+ - [Богдан Теунаев](https://t.me/bogdan_tewunalany)
26
+ - [Али Берберов](https://t.me/ali_berberov)
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Карачаево-балкарский чат-бот на основе TSjB/QM-4B
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from datetime import datetime
6
+ import pytz
7
+
8
+ # Constants
9
+ MODEL_LLM_PATH = 'TSjB/QM-4B'
10
+ DEFAULT_SYSTEM_PROMPT = "Сен къарачай-малкъар тилде болушлукъчуса. Соруўлагъа къысха, тюз эм ачыкъ джуўабла бер."
11
+
12
+ TZ = pytz.timezone('Europe/Moscow')
13
+
14
+ # Load model
15
+ print(f"Loading model {MODEL_LLM_PATH}...")
16
+ tokenizer_llm = AutoTokenizer.from_pretrained(MODEL_LLM_PATH, trust_remote_code=True)
17
+ model_llm = AutoModelForCausalLM.from_pretrained(
18
+ MODEL_LLM_PATH,
19
+ trust_remote_code=True,
20
+ torch_dtype=torch.bfloat16,
21
+ device_map="auto",
22
+ )
23
+ model_llm.eval()
24
+ print("Model loaded!")
25
+
26
+
27
+ def chat_llm(message, history, system_prompt, temperature, max_tokens, top_p, repetition_penalty):
28
+ """
29
+ Chat function for LLM model TSjB/QM-4B
30
+ """
31
+ if not message.strip():
32
+ return ""
33
+
34
+ # Build messages from history
35
+ messages = []
36
+ if system_prompt and system_prompt.strip():
37
+ messages.append({"role": "system", "content": system_prompt.strip()})
38
+
39
+ # Add history
40
+ for msg in history:
41
+ messages.append({"role": msg["role"], "content": msg["content"]})
42
+
43
+ # Add current message
44
+ messages.append({"role": "user", "content": message})
45
+
46
+ print(f'Chat input: {message} - Time: {datetime.now(tz=TZ)}')
47
+
48
+ # Apply chat template
49
+ try:
50
+ try:
51
+ text = tokenizer_llm.apply_chat_template(
52
+ messages,
53
+ tokenize=False,
54
+ add_generation_prompt=True,
55
+ enable_thinking=False
56
+ )
57
+ except TypeError:
58
+ text = tokenizer_llm.apply_chat_template(
59
+ messages,
60
+ tokenize=False,
61
+ add_generation_prompt=True
62
+ )
63
+ except Exception as e:
64
+ print(f"Error applying chat template: {e}")
65
+ text = message
66
+
67
+ # Tokenize
68
+ inputs = tokenizer_llm(
69
+ text,
70
+ return_tensors="pt",
71
+ truncation=True,
72
+ max_length=4096
73
+ ).to(model_llm.device)
74
+
75
+ if 'token_type_ids' in inputs:
76
+ inputs.pop('token_type_ids')
77
+
78
+ # Generate
79
+ with torch.no_grad():
80
+ outputs = model_llm.generate(
81
+ **inputs,
82
+ max_new_tokens=int(max_tokens),
83
+ temperature=float(temperature) if float(temperature) > 0 else 0.01,
84
+ top_p=float(top_p),
85
+ do_sample=True,
86
+ repetition_penalty=float(repetition_penalty),
87
+ pad_token_id=tokenizer_llm.pad_token_id,
88
+ eos_token_id=tokenizer_llm.eos_token_id,
89
+ )
90
+
91
+ # Decode only new tokens
92
+ response = tokenizer_llm.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
93
+
94
+ print(f'Chat output: {response[:100]}... - Time: {datetime.now(tz=TZ)}')
95
+
96
+ return response
97
+
98
+
99
+ # UI
100
+ with gr.Blocks(title="Къарачай-Малкъар ушакъ-бот", theme=gr.themes.Soft()) as demo:
101
+ gr.Markdown("# 💬 Къарачай-Малкъар ушакъ-бот")
102
+ gr.Markdown("Чат-бот на основе модели [TSjB/QM-4B](https://huggingface.co/TSjB/QM-4B) для карачаево-балкарского языка.")
103
+
104
+ with gr.Accordion("Настройки / Settings", open=False):
105
+ chat_system_prompt = gr.Textbox(
106
+ lines=2,
107
+ value=DEFAULT_SYSTEM_PROMPT,
108
+ label="Системаны промпту / System prompt"
109
+ )
110
+
111
+ with gr.Row():
112
+ chat_temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature")
113
+ chat_max_tokens = gr.Slider(minimum=16, maximum=2048, value=256, step=16, label="Max tokens")
114
+ with gr.Row():
115
+ chat_top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p")
116
+ chat_repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.2, step=0.1, label="Repetition penalty")
117
+
118
+ gr.ChatInterface(
119
+ chat_llm,
120
+ additional_inputs=[chat_system_prompt, chat_temperature, chat_max_tokens, chat_top_p, chat_repetition_penalty],
121
+ examples=[
122
+ ["Салам! Сен кимсе?"],
123
+ ["Къарачай-Малкъар тилни юсюнден айт."],
124
+ ["Минги Таў деген не болгъанды?"],
125
+ ["Привет! Расскажи о себе на русском."],
126
+ ],
127
+ )
128
+
129
+ gr.Markdown("""
130
+ ---
131
+ **Разработчики:** [Богдан Теунаев](https://t.me/bogdan_tewunalany), [Али Берберов](https://t.me/ali_berberov)
132
+ """)
133
+
134
+ if __name__ == "__main__":
135
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers==4.47.1
2
+ tokenizers==0.21.0
3
+ accelerate
4
+ huggingface_hub==0.27.1
5
+ torch
6
+ gradio==5.9.1
7
+ pytz