Spaces:
Sleeping
Sleeping
| #%% | |
| import os | |
| # from dotenv import load_dotenv | |
| # load_dotenv('../../.env') | |
| from langchain_core.prompts import PromptTemplate | |
| from typing import List | |
| from transformers import AutoTokenizer | |
| from huggingface_hub import login | |
| import models | |
| login(os.environ['HF_TOKEN']) | |
| tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") | |
| def format_prompt(prompt) -> PromptTemplate: | |
| """this function prepares a well-formatted prompt template for interacting with a | |
| large language model, ensuring that the model has a clear role (AI assistant) | |
| and understands the user’s input. | |
| It first 1. format the input prompt by using the model specific instruction template | |
| 2. return a langchain PromptTemplate | |
| """ | |
| chat = [ | |
| {"role": "system", "content": "You are a helpful AI assistant."}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| formatted_prompt = tokenizer.apply_chat_template( | |
| chat, # chat-specific formatting template to a conversation input | |
| tokenize=False, # indicates that the function should return the formatted chat as a string (rather than tokenizing it into numerical tokens). | |
| add_generation_prompt=True # adds a prompt or marker that signals where the model should start generating the response in a dialogue or conversation flow | |
| ) | |
| return PromptTemplate.from_template(formatted_prompt) | |
| def format_chat_history(messages: List[models.Message]): | |
| # TODO: implement format_chat_history to format | |
| # the list of Message into a text of chat history. | |
| # Sort messages by timestamp using a lambda function | |
| ordered_messages = sorted(messages, key=lambda m: m.timestamp, reverse=False) | |
| return '\n'.join([ | |
| '[{}] {}: {}'.format( | |
| message.timestamp.strftime("%Y-%m-%d %H:%M:%S"), | |
| message.type, | |
| message.message | |
| ) for message in ordered_messages | |
| ]) | |
| def format_context(docs: List[str]): | |
| # TODO: the output of the DataIndexer.search is a list of text, | |
| # so we need to concatenate that list into a text that can fit into | |
| # the rag_prompt_formatted. Implement format_context that takes a | |
| # like of strings and returns the context as one string. | |
| return '\n\n'.join(docs) | |
| prompt = "{question}" | |
| # TODO: Create the history_prompt prompt that will capture the question and the conversation history. | |
| # The history_prompt needs a {chat_history} placeholder and a {question} placeholder. | |
| history_prompt: str = """ | |
| Given the follwoing conversation provide a helpful answer to the follow up question. | |
| Chat History: | |
| {chat_history} | |
| Follow Up Question: {question} | |
| helpful answer: | |
| """ | |
| # Create the standalone_prompt prompt that will capture the question and the chat history | |
| # to generate a standalone question. It needs a {chat_history} placeholder and a {question} placeholder, | |
| standalone_prompt: str = """ | |
| Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. | |
| Chat History: | |
| {chat_history} | |
| Follow Up Input: {question} | |
| Standalone question: | |
| """ | |
| # Create the rag_prompt that will capture the context and the standalone question to generate | |
| # a final answer to the question. | |
| rag_prompt: str = """ | |
| Answer the question based only on the following context: | |
| {context} | |
| Question: {standalone_question} | |
| """ | |
| # create raw_prompt_formatted by using format_prompt | |
| #raw_prompt_formatted = format_prompt(raw_prompt) | |
| #raw_prompt = PromptTemplate.from_template(raw_prompt) | |
| # i) raw prompt | |
| raw_prompt = PromptTemplate.from_template(prompt) | |
| # ii) formatted prompt | |
| raw_prompt_formatted = format_prompt(prompt) | |
| # use format_prompt to create history_prompt_formatted | |
| history_prompt_formatted = format_prompt(history_prompt) | |
| # use format_prompt to create standalone_prompt_formatted | |
| standalone_prompt_formatted = format_prompt(standalone_prompt) | |
| # use format_prompt to create rag_prompt_formatted | |
| rag_prompt_formatted = format_prompt(rag_prompt) |