SimaFarazi's picture
add comments to simple app stream
feb1823
raw
history blame contribute delete
829 Bytes
from transformers import AutoTokenizer
from langchain_huggingface import HuggingFaceEndpoint
import os
from prompts import (
raw_prompt
)
import schemas
# Get tokenizer; required to get eos_token
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Instantiate HuggingFace endpoint with Llama model
llm = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
huggingfacehub_api_token=os.environ['HF_TOKEN'],
max_new_tokens=512, # Response will not exceed 512 words/tokens
stop_sequences=[tokenizer.eos_token],
streaming=True,
)
# Build a chain by pipping prompt object & HF endpoint
# Attach UserQuestion data model to chain to ensure validity of input data
simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion)