Spaces:
Paused
Paused
| # from langchain.llms import LlamaCpp | |
| # from langchain import prompts, LLMChain | |
| # from langchain.callbacks.manager import CallbackManager | |
| # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # MODEL_PATH = "D:/FDM/llama-2-7b-chat.Q2_K.gguf" | |
| # # TODO: | |
| # # install necesarry libraries. I already have langchain | |
| # # installed, make sure you are running the latest | |
| # # version of python 3.8.1+. I have tried with the CPU only and the GPU | |
| # # 1. Create a function to generate prompt | |
| # # 2. Create a function to load Llama-2 | |
| # def create_prompt() -> prompts.PromptTemplate: | |
| # """ | |
| # Generates prompt template | |
| # :param: Takes in no parameters | |
| # :return: a prompt template | |
| # """ | |
| # # Prompt obtained from langchain docs | |
| # _DEFAULT_TEMPLATE: str = """Assistant is a large language model trained by Meta. | |
| # Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. | |
| # Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics. | |
| # Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist. | |
| # Human: {human_input} | |
| # Assistant:""" | |
| # prompt: prompts.PromptTemplate = prompts.PromptTemplate( | |
| # input_variables=["human_input"], template=_DEFAULT_TEMPLATE) | |
| # return prompt | |
| # def load_model() -> LlamaCpp: | |
| # # Callbacks support token-wise streaming | |
| # # Verbose is required to pass to the callback manager | |
| # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| # # n_gpu_layers - determines how many layers of the model are offloaded to your GPU. | |
| # # n_batch - how many tokens are processed in parallel. | |
| # # Change this value based on your model and your GPU VRAM pool. | |
| # n_gpu_layers = 40 | |
| # # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. | |
| # n_batch = 512 | |
| # Llama_llm = LlamaCpp( | |
| # model_path=MODEL_PATH, | |
| # temperature=0.75, | |
| # max_tokens=2000, | |
| # n_gpu_layers=n_gpu_layers, | |
| # n_batch=n_batch, | |
| # top_p=1, | |
| # callback_manager=callback_manager, | |
| # verbose=True, | |
| # ) | |
| # return Llama_llm | |
| # def reponse(promt): | |
| # llm = load_model() | |
| # model_prompt = promt | |
| # # model_prompt: str = """ | |
| # # Based from these students' feedback: I don't clearly understand the lesson, topic was not allinged with the course, Sir is always late, no proper use of learning materials. \n | |
| # # Please generate a very short recommendation to the instructor. Make it in sentence type and in English language only. | |
| # # """ | |
| # response = llm(prompt=model_prompt) | |
| # # print(response) | |
| # return response | |
| # from langchain.llms import LlamaCpp | |
| # from langchain import prompts, LLMChain | |
| # from langchain.callbacks.manager import CallbackManager | |
| # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # MODEL_PATH = "D:/FDM/llama-2-7b-chat.Q2_K.gguf" | |
| # Llama_llm = None # Global variable to hold the LlamaCpp instance | |
| # llm_chain = None # Global variable to hold the LLMChain instance | |
| # def create_prompt(human_input: str) -> prompts.PromptTemplate: | |
| # """ | |
| # Generates a custom prompt template. | |
| # :param human_input: The user's input that will be included in the prompt. | |
| # :return: A prompt template with the specified user input. | |
| # """ | |
| # template = f"""Assistant is a large language model trained by Meta. | |
| # Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. | |
| # Human: {human_input} | |
| # Assistant:""" | |
| # prompt = prompts.PromptTemplate(input_variables=["human_input"], template=template) | |
| # return prompt | |
| # def load_model() -> LLMChain: | |
| # global Llama_llm, llm_chain | |
| # if Llama_llm is None: | |
| # # Create the LlamaCpp instance only if it's not already loaded | |
| # print("Loading LlamaCpp instance...") | |
| # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| # Llama_llm = LlamaCpp( | |
| # model_path=MODEL_PATH, | |
| # callback_manager=callback_manager, | |
| # verbose=True, | |
| # ) | |
| # if llm_chain is None: | |
| # # Create the LLMChain instance only if it's not already loaded | |
| # print("Creating LLMChain instance...") | |
| # prompt = create_prompt("What is python programming?") | |
| # llm_chain = LLMChain(llm=Llama_llm, prompt=prompt) | |
| # return llm_chain | |
| # # Load the model | |
| # print("Attempting to load the model...") | |
| # llm_chain = load_model() | |
| # # Now you can use llm_chain to make predictions without reloading the model | |
| # question = "What is python programming?" | |
| # custom_prompt = create_prompt(question) | |
| # response = llm_chain.run({'human_input': question}) | |
| # print("Model response:", response) | |
| from langchain.llms.llamacpp import LlamaCpp | |
| from langchain.chains import LLMChain | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| from langchain.prompts import PromptTemplate | |
| MODEL_PATH = "D:/FDM/llama-2-7b-chat.Q2_K.gguf" | |
| Llama_llm = None # Global variable to hold the LlamaCpp instance | |
| llm_chain = None # Global variable to hold the LLMChain instance | |
| n_gpu_layers = 40 | |
| n_batch = 512 | |
| def create_prompt_template(human_input: str) -> PromptTemplate: | |
| """ | |
| Generates a prompt template that only includes the "Human" input. | |
| :param human_input: The user's input that will be included in the prompt. | |
| :return: A prompt template with the specified user input. | |
| """ | |
| prompt = PromptTemplate( | |
| input_variables=["human_input"], # Make sure it's a list | |
| template="{human_input}" | |
| ) | |
| return prompt | |
| def load_model() -> LLMChain: | |
| global Llama_llm, llm_chain | |
| if Llama_llm is None: | |
| # Create the LlamaCpp instance only if it's not already loaded | |
| print("Loading LlamaCpp instance...") | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| Llama_llm = LlamaCpp( | |
| model_path=MODEL_PATH, | |
| temperature=0.75, | |
| max_tokens=2000, | |
| n_gpu_layers=n_gpu_layers, | |
| n_batch=n_batch, | |
| top_p=1, | |
| callback_manager=callback_manager, | |
| verbose=True, | |
| ) | |
| if llm_chain is None: | |
| # Create the LLMChain instance only if it's not already loaded | |
| print("Creating LLMChain instance...") | |
| prompt = create_prompt_template("") | |
| llm_chain = LLMChain(llm=Llama_llm, prompt=prompt) | |
| return llm_chain | |
| print("Attempting to load the model...") | |
| llm_chain = load_model() | |
| def langchain_input(prompt): | |
| print("langchain_input............") | |
| # Load the model | |
| # Use llm_chain to make predictions with the updated prompt template | |
| response = llm_chain.run(prompt) | |
| # response = llm_chain.run("What is python programming?") | |
| # print("Model response (without 'Assistant'):", response) | |
| return response | |