bkaplan commited on
Commit
2cecba6
·
verified ·
1 Parent(s): 57fa342

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -26
app.py CHANGED
@@ -1,37 +1,34 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
- # InferenceClient'ı oluşturun
5
- client = InferenceClient("bkaplan/MRL1")
 
 
6
 
7
  def respond(message, history, system_message, max_tokens, temperature, top_p):
8
- # Mesaj listesini hazırlayın
9
- messages = []
10
-
11
- # Sistem mesajını ekleyin
12
- if system_message:
13
- messages.append({"role": "system", "content": system_message})
14
-
15
- # Geçmiş mesajları ekleyin
16
- for user, assistant in history:
17
- if user:
18
- messages.append({"role": "user", "content": user})
19
- if assistant:
20
- messages.append({"role": "assistant", "content": assistant})
21
-
22
- # Yeni mesajı ekleyin
23
- messages.append({"role": "user", "content": message})
24
-
25
  try:
26
- # chat yerine text_generation metodunu kullanın
27
- response = client.text_generation(
28
- prompt=message, # Doğrudan son mesajı kullanın
29
- max_new_tokens=max_tokens,
30
- temperature=temperature,
 
 
 
 
 
 
31
  top_p=top_p,
32
- # Gerekirse ek parametreler ekleyebilirsiniz
 
33
  )
 
 
 
34
  yield response
 
35
  except Exception as e:
36
  yield f"Hata oluştu: {str(e)}"
37
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
+ # Modeli yükleyin
6
+ model_name = "bkaplan/MRL1"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
9
 
10
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
+ # Girdiyi hazırlama
13
+ input_text = f"System: {system_message}\nUser: {message}\nAssistant:"
14
+
15
+ # Tokenize etme
16
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
17
+
18
+ # Üretim parametreleri
19
+ outputs = model.generate(
20
+ **inputs,
21
+ max_length=max_tokens,
22
+ temperature=temperature,
23
  top_p=top_p,
24
+ num_return_sequences=1,
25
+ do_sample=True
26
  )
27
+
28
+ # Yanıtı çözme
29
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
  yield response
31
+
32
  except Exception as e:
33
  yield f"Hata oluştu: {str(e)}"
34