hotmemeh commited on
Commit
c1cc47f
·
verified ·
1 Parent(s): 144f336

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -36
app.py CHANGED
@@ -1,48 +1,30 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
- MODEL_NAME = "darkc0de/XortronCriminalComputingConfig"
 
 
 
6
 
7
- print(f"Loading model: {MODEL_NAME}")
8
-
9
- # Load tokenizer & model
10
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
-
12
- # device_map="auto" lets it use GPU if available, otherwise CPU (warning: very slow on CPU)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_NAME,
15
- device_map="auto",
16
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
17
- low_cpu_mem_usage=True,
18
- )
19
-
20
- generator = pipeline(
21
- "text-generation",
22
- model=model,
23
- tokenizer=tokenizer,
24
- device=0 if torch.cuda.is_available() else -1,
25
- )
26
-
27
- # Streaming response
28
  def respond(message, history):
29
- output = generator(
30
- message,
 
31
  max_new_tokens=256,
32
  do_sample=True,
33
  temperature=0.7,
34
- truncation=True,
35
- )[0]["generated_text"]
36
-
37
- for i in range(0, len(output), 20):
38
- yield {"role": "assistant", "content": output[: i + 20]}
39
 
40
- # Build Gradio chat
41
- chat = gr.ChatInterface(
42
  fn=respond,
43
- type="messages",
44
- chatbot=gr.Chatbot(height=600, show_copy_button=True, type="messages"),
 
 
45
  )
46
 
47
  if __name__ == "__main__":
48
- chat.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
+ # load the Xortron model
5
+ MODEL_ID = "darkc0de/XortronCriminalComputingConfig"
6
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
7
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def respond(message, history):
10
+ inputs = tokenizer(message, return_tensors="pt")
11
+ outputs = model.generate(
12
+ **inputs,
13
  max_new_tokens=256,
14
  do_sample=True,
15
  temperature=0.7,
16
+ top_p=0.9,
17
+ )
18
+ reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
+ return reply
 
20
 
21
+ demo = gr.ChatInterface(
 
22
  fn=respond,
23
+ type="messages", # avoids that deprecation warning
24
+ chatbot=gr.Chatbot(height=600, show_copy_button=True),
25
+ textbox=gr.Textbox(placeholder="Chat with Xortron...", container=False, scale=7),
26
+ title="Xortron Chat",
27
  )
28
 
29
  if __name__ == "__main__":
30
+ demo.launch()