cody82 commited on
Commit
4499474
·
verified ·
1 Parent(s): 101f40d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -41
app.py CHANGED
@@ -1,50 +1,44 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
- client = InferenceClient("cody82/innopolis_bot_model")
5
 
6
- def respond(
7
- message,
8
- history: list[tuple[str, str]],
9
- system_message,
10
- max_tokens,
11
- temperature,
12
- top_p,
13
- ):
14
- # Собираем историю в текст
15
- full_prompt = system_message.strip() + "\n"
16
 
17
- for user_msg, bot_msg in history:
18
- if user_msg:
19
- full_prompt += f"User: {user_msg}\n"
20
- if bot_msg:
21
- full_prompt += f"Bot: {bot_msg}\n"
22
 
23
- full_prompt += f"User: {message}\nBot:"
 
 
 
 
 
 
24
 
25
- # Генерируем ответ от модели
26
- response = ""
27
- for token in client.text_generation(
28
- prompt=full_prompt,
29
- max_new_tokens=max_tokens,
30
- temperature=temperature,
31
- top_p=top_p,
32
- stream=True,
33
- ):
34
- response += token
35
- yield response
 
 
 
 
 
 
36
 
37
- demo = gr.ChatInterface(
38
- respond,
39
- additional_inputs=[
40
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
41
- gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
42
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
43
- gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p"),
44
- ],
45
- title="Innopolis Bot",
46
- description="Чат с кастомной моделью cody82/innopolis_bot_model",
47
- )
48
 
49
  if __name__ == "__main__":
50
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ model_id = "cody82/innopolis_bot_model"
6
 
7
+ # Загружаем токенизатор и модель
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
10
 
11
+ # Убираем cuda, т.к. у нас CPU
12
+ device = torch.device("cpu")
13
+ model = model.to(device)
 
 
14
 
15
+ def respond(message, history):
16
+ history = history or []
17
+ # Объединяем историю и текущий ввод
18
+ full_input = ""
19
+ for user, bot in history:
20
+ full_input += f"User: {user}\nAssistant: {bot}\n"
21
+ full_input += f"User: {message}\nAssistant:"
22
 
23
+ inputs = tokenizer(full_input, return_tensors="pt").to(device)
24
+ outputs = model.generate(
25
+ **inputs,
26
+ max_new_tokens=200,
27
+ do_sample=True,
28
+ temperature=0.7,
29
+ top_p=0.95,
30
+ pad_token_id=tokenizer.eos_token_id
31
+ )
32
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
33
+
34
+ # Извлекаем только ответ ассистента (после последнего "Assistant:")
35
+ if "Assistant:" in response:
36
+ response = response.split("Assistant:")[-1].strip()
37
+
38
+ history.append((message, response))
39
+ return response, history
40
 
41
+ chat = gr.ChatInterface(fn=respond, title="Innopolis Chatbot")
 
 
 
 
 
 
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
+ chat.launch()