Spaces:

SimaFarazi
/

Backend_c

Sleeping

add comments to simple app stream

feb1823 over 1 year ago

829 Bytes

	from transformers import AutoTokenizer
	from langchain_huggingface import HuggingFaceEndpoint
	import os
	from prompts import (
	raw_prompt
	)
	import schemas

	# Get tokenizer; required to get eos_token
	model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	# Instantiate HuggingFace endpoint with Llama model
	llm = HuggingFaceEndpoint(
	repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
	huggingfacehub_api_token=os.environ['HF_TOKEN'],
	max_new_tokens=512, # Response will not exceed 512 words/tokens
	stop_sequences=[tokenizer.eos_token],
	streaming=True,
	)

	# Build a chain by pipping prompt object & HF endpoint
	# Attach UserQuestion data model to chain to ensure validity of input data
	simple_chain = (raw_prompt \| llm).with_types(input_type=schemas.UserQuestion)