Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| import os | |
| from prompts import ( | |
| raw_prompt | |
| ) | |
| import schemas | |
| # Get tokenizer; required to get eos_token | |
| model_id = "meta-llama/Meta-Llama-3-8B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # Instantiate HuggingFace endpoint with Llama model | |
| llm = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Meta-Llama-3-8B-Instruct", | |
| huggingfacehub_api_token=os.environ['HF_TOKEN'], | |
| max_new_tokens=512, # Response will not exceed 512 words/tokens | |
| stop_sequences=[tokenizer.eos_token], | |
| streaming=True, | |
| ) | |
| # Build a chain by pipping prompt object & HF endpoint | |
| # Attach UserQuestion data model to chain to ensure validity of input data | |
| simple_chain = (raw_prompt | llm).with_types(input_type=schemas.UserQuestion) | |