ejschwartz commited on
Commit
9b3bbd4
·
1 Parent(s): 3690803
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -1,12 +1,34 @@
1
  import spaces
2
- from transformers import pipeline
3
  import gradio as gr
 
 
4
 
5
- pipe = pipeline(model="ejschwartz/decaf-v1-22b-4bit")
6
- pipe.model.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  @spaces.GPU(size="xlarge")
9
  def generate(text):
 
10
  return pipe(text)[0]
11
 
12
  demo = gr.Interface(fn=generate, inputs="text", outputs="text")
 
1
  import spaces
2
+ from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
3
  import gradio as gr
4
+ import torch
5
+ import logging
6
 
7
+ logging.basicConfig(level=logging.INFO)
8
+ logging.info(f"CUDA available: {torch.cuda.is_available()}, CUDA version: {torch.version.cuda}")
9
+ try:
10
+ import bitsandbytes as _bnb
11
+ logging.info(f"bitsandbytes version: {_bnb.__version__}")
12
+ except Exception as e:
13
+ logging.warning(f"Could not import bitsandbytes: {e}")
14
+
15
+ bnb_config = BitsAndBytesConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_compute_dtype=torch.float16, # key
18
+ )
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained("ejschwartz/decaf-v1-22b-4bit")
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ "ejschwartz/decaf-v1-22b-4bit",
23
+ device_map="auto",
24
+ quantization_config=bnb_config,
25
+ )
26
+
27
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
28
 
29
  @spaces.GPU(size="xlarge")
30
  def generate(text):
31
+ print(f"Generating text... {text}")
32
  return pipe(text)[0]
33
 
34
  demo = gr.Interface(fn=generate, inputs="text", outputs="text")