Trinoid commited on
Commit
8dcb47d
·
verified ·
1 Parent(s): fe0d2c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -1,19 +1,13 @@
1
  import gradio as gr
2
  import os
3
  import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
 
6
  """
7
  Load model and tokenizer directly using transformers
8
  """
9
  model_name = "PlantWisdom/Data_Management_Mistral"
10
 
11
- # Configure quantization for lower memory usage
12
- quantization_config = BitsAndBytesConfig(
13
- load_in_4bit=True,
14
- bnb_4bit_compute_dtype=torch.float16
15
- )
16
-
17
  # Load tokenizer and model
18
  print("Loading tokenizer...")
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -21,8 +15,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  print("Loading model...")
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
24
- quantization_config=quantization_config,
25
  device_map="auto",
 
26
  )
27
 
28
  def respond(
 
1
  import gradio as gr
2
  import os
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  """
7
  Load model and tokenizer directly using transformers
8
  """
9
  model_name = "PlantWisdom/Data_Management_Mistral"
10
 
 
 
 
 
 
 
11
  # Load tokenizer and model
12
  print("Loading tokenizer...")
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
15
  print("Loading model...")
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
+ torch_dtype=torch.float16,
19
  device_map="auto",
20
+ low_cpu_mem_usage=True
21
  )
22
 
23
  def respond(