"""## RetrievalQA with LLaMA 2-70B on Together API""" # import libraries import os import together import logging from typing import Any, Dict, List, Mapping, Optional from pydantic import Extra, Field, root_validator from langchain.llms.base import LLM from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.document_loaders import TextLoader from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import DirectoryLoader from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.chains.question_answering import load_qa_chain import gradio as gr # set your API key os.environ["TOGETHER_API_KEY"] = "6216ce36aadcb06c35436e7d6bbbc18b354d8140f6e805db485d70ecff4481d0" together.api_key = os.environ["TOGETHER_API_KEY"] model= "togethercomputer/llama-2-70b-chat" #model = "meta-llama/Llama-3-70b-chat-hf" # set llama model try: together.Models.start(model) except: print("404 Error") class TogetherLLM(LLM): """Together large language models.""" model: str = "togethercomputer/llama-2-70b-chat" """model endpoint to use""" together_api_key: str = os.environ["TOGETHER_API_KEY"] """Together API key""" temperature: float = 0.7 """What sampling temperature to use.""" max_tokens: int = 512 """The maximum number of tokens to generate in the completion.""" class Config: extra = Extra.forbid @property def _llm_type(self) -> str: """Return type of LLM.""" return "together" def _call( self, prompt: str, **kwargs: Any, ) -> str: """Call to Together endpoint.""" together.api_key = self.together_api_key output = together.Complete.create(prompt, model=self.model, max_tokens=self.max_tokens, temperature=self.temperature, ) text = output['output']['choices'][0]['text'] return text # Load and process the text files loader = TextLoader('resume_data.txt') # loader = DirectoryLoader('./folder/', glob="./*.pdf", loader_cls=PyPDFLoader) documents = loader.load() model= "togethercomputer/llama-2-70b-chat" # model = "meta-llama/Llama-3-70b-chat-hf" # Make a chain llm = TogetherLLM( model= model, temperature = 0.1, max_tokens = 1024) #prompt def prompt_add(question): prompt = f""" this data is about my resume. answer as if person responding. do not ask question back. i have information about my education, work experience, skills, and contact information. do not generate random response always answer from give text context only. if answer is not available respond, 'No information available' Answer to given question: {question} """ return prompt # chain chain = load_qa_chain(llm=llm, chain_type="stuff") query1= "what is this story about?" chain.run(input_documents=documents, question=prompt_add(query1)) # gradio description = "This is a chatbot application based on the llama2 70B model. Simply type an input to get started with chatting.\n Note : Bot can generate random response sometimes" examples = [["what is your contact number?"], ["where you are currently working?"]] #def greet(query1, history): # return chain.run(input_documents=documents, question="answer as if person responding. do not ask question back. \n Question: "+query1) list_greet = ["hi","hey","yo"] def greet(query1,history): try: if query1.lower() in list_greet: return "Hello. How can I assist you today?" else: return chain.run(input_documents=documents, question= prompt_add(query1).lower()) except: return "API error" gr.ChatInterface(greet,title = "Chat with my Bot", description=description,examples=examples).launch(debug = True)