Sam-Oliveira commited on
Commit
d0a0b7a
·
1 Parent(s): f2f3755

change triton cache

Browse files
Files changed (2) hide show
  1. src/streamlit_app.py +1 -0
  2. src/summarise.py +5 -3
src/streamlit_app.py CHANGED
@@ -15,6 +15,7 @@ for var in (
15
  "HF_HUB_CACHE",
16
  "TRANSFORMERS_CACHE",
17
  "SENTENCE_TRANSFORMERS_HOME",
 
18
  ):
19
  os.environ[var] = str(CACHE_DIR)
20
 
 
15
  "HF_HUB_CACHE",
16
  "TRANSFORMERS_CACHE",
17
  "SENTENCE_TRANSFORMERS_HOME",
18
+ "TRITON_CACHE_DIR",
19
  ):
20
  os.environ[var] = str(CACHE_DIR)
21
 
src/summarise.py CHANGED
@@ -18,14 +18,16 @@ PROMPT = (
18
 
19
  # ---------------------------------------------------------------------- #
20
  def load_pipe():
 
21
  cache_dir = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
  MODEL_NAME,
24
- cache_dir=cache_dir,
25
- #load_in_4bit=True,
26
  device_map="auto"
27
  )
28
- tok = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=cache_dir)
29
  tok.pad_token = tok.eos_token
30
  return pipeline(
31
  "text-generation",
 
18
 
19
  # ---------------------------------------------------------------------- #
20
  def load_pipe():
21
+ # Cache directories are already set up in streamlit_app.py
22
  cache_dir = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
23
+
24
  model = AutoModelForCausalLM.from_pretrained(
25
  MODEL_NAME,
26
+ cache_dir=str(cache_dir),
27
+ load_in_4bit=True,
28
  device_map="auto"
29
  )
30
+ tok = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=str(cache_dir))
31
  tok.pad_token = tok.eos_token
32
  return pipeline(
33
  "text-generation",