VcRlAgent commited on
Commit
6efd79e
·
1 Parent(s): 21a7404

fix causal

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -48,7 +48,7 @@ def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float
48
  # Tiny, fast sentence-transformers model for embeddings
49
  embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
 
51
-
52
  tok = AutoTokenizer.from_pretrained(model_id)
53
  mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
54
  text2text = pipeline(
@@ -58,20 +58,20 @@ def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float
58
  max_new_tokens=max_new_tokens,
59
  temperature=float(temperature)
60
  )
61
-
62
 
63
  # Wrap the same tiny HF model for LlamaIndex
64
-
65
- """
66
  llm = HuggingFaceLLM(
67
  model_name=model_id,
68
- tokenizer_name=model_id,
 
69
  context_window=2048,
70
  generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
71
  device_map="cpu",
72
  )
73
- """
74
- llm = HuggingFaceLLM(pipeline=text2text)
75
 
76
  Settings.embed_model = embed
77
  Settings.llm = llm
 
48
  # Tiny, fast sentence-transformers model for embeddings
49
  embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
 
51
+ """
52
  tok = AutoTokenizer.from_pretrained(model_id)
53
  mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
54
  text2text = pipeline(
 
58
  max_new_tokens=max_new_tokens,
59
  temperature=float(temperature)
60
  )
61
+ """
62
 
63
  # Wrap the same tiny HF model for LlamaIndex
64
+
 
65
  llm = HuggingFaceLLM(
66
  model_name=model_id,
67
+ tokenizer_name=model_id,
68
+ model_cls=AutoModelForSeq2SeqLM,
69
  context_window=2048,
70
  generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
71
  device_map="cpu",
72
  )
73
+
74
+ #llm = HuggingFaceLLM(pipeline=text2text)
75
 
76
  Settings.embed_model = embed
77
  Settings.llm = llm