huggingfacelogin
Browse files- .gitignore +1 -1
- src/streamlit_app.py +3 -3
.gitignore
CHANGED
|
@@ -2,4 +2,4 @@ todo.txt
|
|
| 2 |
/data
|
| 3 |
/airflow
|
| 4 |
.env
|
| 5 |
-
/src/secrets.toml
|
|
|
|
| 2 |
/data
|
| 3 |
/airflow
|
| 4 |
.env
|
| 5 |
+
/src/.streamlit/secrets.toml
|
src/streamlit_app.py
CHANGED
|
@@ -11,10 +11,10 @@ from langchain_community.llms import Ollama
|
|
| 11 |
import os
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
-
|
| 17 |
-
os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
|
| 18 |
|
| 19 |
# ----------------------
|
| 20 |
system_prompt = (
|
|
@@ -47,7 +47,7 @@ def load_llm():
|
|
| 47 |
# load the tokenizer and model on cpu/gpu
|
| 48 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 50 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
|
| 51 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
|
| 52 |
return HuggingFacePipeline(pipeline=pipe)
|
| 53 |
|
|
|
|
| 11 |
import os
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 14 |
+
from huggingface_hub import InferenceClient
|
| 15 |
|
| 16 |
|
| 17 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 18 |
|
| 19 |
# ----------------------
|
| 20 |
system_prompt = (
|
|
|
|
| 47 |
# load the tokenizer and model on cpu/gpu
|
| 48 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 50 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto",use_auth_token=HF_TOKEN)
|
| 51 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
|
| 52 |
return HuggingFacePipeline(pipeline=pipe)
|
| 53 |
|