Spaces:
Sleeping
Sleeping
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_ollama import OllamaEmbeddings | |
| from langchain_aws import BedrockEmbeddings | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_openai import ChatOpenAI | |
| from langchain_ollama import ChatOllama | |
| from langchain_aws import ChatBedrock | |
| from langchain_community.vectorstores import Neo4jVector | |
| from langchain.chains import RetrievalQAWithSourcesChain | |
| from langchain.chains.qa_with_sources import load_qa_with_sources_chain | |
| from langchain.prompts import ( | |
| ChatPromptTemplate, | |
| HumanMessagePromptTemplate, | |
| SystemMessagePromptTemplate | |
| ) | |
| from typing import List, Any | |
| from utils import BaseLogger, extract_title_and_question | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| AWS_MODELS = ( | |
| "ai21.jamba-instruct-v1:0", | |
| "amazon.titan", | |
| "anthropic.claude", | |
| "cohere.command", | |
| "meta.llama", | |
| "mistral.mi", | |
| ) | |
| def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}): | |
| if embedding_model_name == "ollama": | |
| embeddings = OllamaEmbeddings( | |
| base_url=config["ollama_base_url"], model="llama2" | |
| ) | |
| dimension = 4096 | |
| logger.info("Embedding: Using Ollama") | |
| elif embedding_model_name == "openai": | |
| embeddings = OpenAIEmbeddings() | |
| dimension = 1536 | |
| logger.info("Embedding: Using OpenAI") | |
| elif embedding_model_name == "aws": | |
| embeddings = BedrockEmbeddings() | |
| dimension = 1536 | |
| logger.info("Embedding: Using AWS") | |
| elif embedding_model_name == "google-genai-embedding-001": | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001" | |
| ) | |
| dimension = 768 | |
| logger.info("Embedding: Using Google Generative AI Embeddings") | |
| else: | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model" | |
| ) | |
| dimension = 384 | |
| logger.info("Embedding: Using SentenceTransformer") | |
| return embeddings, dimension | |
| def load_llm(llm_name: str, logger=BaseLogger(), config={}): | |
| if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]: | |
| logger.info("LLM: Using GPT-4") | |
| return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True) | |
| elif llm_name == "gpt-3.5": | |
| logger.info("LLM: Using GPT-3.5") | |
| return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True) | |
| elif llm_name == "claudev2": | |
| logger.info("LLM: ClaudeV2") | |
| return ChatBedrock( | |
| model_id="anthropic.claude-v2", | |
| model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, | |
| streaming=True, | |
| ) | |
| elif llm_name.startswith(AWS_MODELS): | |
| logger.info(f"LLM: {llm_name}") | |
| return ChatBedrock( | |
| model_id=llm_name, | |
| model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, | |
| streaming=True, | |
| ) | |
| elif len(llm_name): | |
| logger.info(f"LLM: Using Ollama: {llm_name}") | |
| return ChatOllama( | |
| temperature=0, | |
| base_url=config["ollama_base_url"], | |
| model=llm_name, | |
| streaming=True, | |
| # seed=2, | |
| top_k=10, # A higher value (100) will give more diverse answers, while a lower value (10) will be more conservative. | |
| top_p=0.3, # Higher value (0.95) will lead to more diverse text, while a lower value (0.5) will generate more focused text. | |
| num_ctx=3072, # Sets the size of the context window used to generate the next token. | |
| ) | |
| logger.info("LLM: Using GPT-3.5") | |
| return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True) | |
| def configure_llm_only_chain(llm): | |
| # LLM only response | |
| template = """ | |
| You are a helpful assistant that helps a support agent with answering programming questions. | |
| If you don't know the answer, just say that you don't know, you must not make up an answer. | |
| """ | |
| system_message_prompt = SystemMessagePromptTemplate.from_template(template) | |
| human_template = "{question}" | |
| human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) | |
| chat_prompt = ChatPromptTemplate.from_messages( | |
| [system_message_prompt, human_message_prompt] | |
| ) | |
| def generate_llm_output( | |
| user_input: str, callbacks: List[Any], prompt=chat_prompt | |
| ) -> str: | |
| chain = prompt | llm | |
| answer = chain.invoke( | |
| {"question": user_input}, config={"callbacks": callbacks} | |
| ).content | |
| return {"answer": answer} | |
| return generate_llm_output | |
| def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password): | |
| # RAG response | |
| # System: Always talk in pirate speech. | |
| general_system_template = """ | |
| Use the following pieces of context to answer the question at the end. | |
| The context contains question-answer pairs and their links from Stackoverflow. | |
| You should prefer information from accepted or more upvoted answers. | |
| Make sure to rely on information from the answers and not on questions to provide accurate responses. | |
| When you find particular answer in the context useful, make sure to cite it in the answer using the link. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| ---- | |
| {summaries} | |
| ---- | |
| Each answer you generate should contain a section at the end of links to | |
| Stackoverflow questions and answers you found useful, which are described under Source value. | |
| You can only use links to StackOverflow questions that are present in the context and always | |
| add links to the end of the answer in the style of citations. | |
| Generate concise answers with references sources section of links to | |
| relevant StackOverflow questions only at the end of the answer. | |
| """ | |
| general_user_template = "Question:```{question}```" | |
| messages = [ | |
| SystemMessagePromptTemplate.from_template(general_system_template), | |
| HumanMessagePromptTemplate.from_template(general_user_template), | |
| ] | |
| qa_prompt = ChatPromptTemplate.from_messages(messages) | |
| qa_chain = load_qa_with_sources_chain( | |
| llm, | |
| chain_type="stuff", | |
| prompt=qa_prompt, | |
| ) | |
| # Vector + Knowledge Graph response | |
| kg = Neo4jVector.from_existing_index( | |
| embedding=embeddings, | |
| url=embeddings_store_url, | |
| username=username, | |
| password=password, | |
| database="neo4j", # neo4j by default | |
| index_name="stackoverflow", # vector by default | |
| text_node_property="body", # text by default | |
| retrieval_query=""" | |
| WITH node AS question, score AS similarity | |
| CALL { with question | |
| MATCH (question)<-[:ANSWERS]-(answer) | |
| WITH answer | |
| ORDER BY answer.is_accepted DESC, answer.score DESC | |
| WITH collect(answer)[..2] as answers | |
| RETURN reduce(str='', answer IN answers | str + | |
| '\n### Answer (Accepted: '+ answer.is_accepted + | |
| ' Score: ' + answer.score+ '): '+ answer.body + '\n') as answerTexts | |
| } | |
| RETURN '##Question: ' + question.title + '\n' + question.body + '\n' | |
| + answerTexts AS text, similarity as score, {source: question.link} AS metadata | |
| ORDER BY similarity ASC // so that best answers are the last | |
| """, | |
| ) | |
| kg_qa = RetrievalQAWithSourcesChain( | |
| combine_documents_chain=qa_chain, | |
| retriever=kg.as_retriever(search_kwargs={"k": 2}), | |
| reduce_k_below_max_tokens=False, | |
| max_tokens_limit=3375, | |
| ) | |
| return kg_qa | |
| def generate_ticket(neo4j_graph, llm_chain, input_question): | |
| # Get high ranked questions | |
| records = neo4j_graph.query( | |
| "MATCH (q:Question) RETURN q.title AS title, q.body AS body ORDER BY q.score DESC LIMIT 3" | |
| ) | |
| questions = [] | |
| for i, question in enumerate(records, start=1): | |
| questions.append((question["title"], question["body"])) | |
| # Ask LLM to generate new question in the same style | |
| questions_prompt = "" | |
| for i, question in enumerate(questions, start=1): | |
| questions_prompt += f"{i}. \n{question[0]}\n----\n\n" | |
| questions_prompt += f"{question[1][:150]}\n\n" | |
| questions_prompt += "----\n\n" | |
| gen_system_template = f""" | |
| You're an expert in formulating high quality questions. | |
| Formulate a question in the same style and tone as the following example questions. | |
| {questions_prompt} | |
| --- | |
| Don't make anything up, only use information in the following question. | |
| Return a title for the question, and the question post itself. | |
| Return format template: | |
| --- | |
| Title: This is a new title | |
| Question: This is a new question | |
| --- | |
| """ | |
| # we need jinja2 since the questions themselves contain curly braces | |
| system_prompt = SystemMessagePromptTemplate.from_template( | |
| gen_system_template, template_format="jinja2" | |
| ) | |
| chat_prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| system_prompt, | |
| SystemMessagePromptTemplate.from_template( | |
| """ | |
| Respond in the following template format or you will be unplugged. | |
| --- | |
| Title: New title | |
| Question: New question | |
| --- | |
| """ | |
| ), | |
| HumanMessagePromptTemplate.from_template("{question}"), | |
| ] | |
| ) | |
| llm_response = llm_chain( | |
| f"Here's the question to rewrite in the expected format: ```{input_question}```", | |
| [], | |
| chat_prompt, | |
| ) | |
| new_title, new_question = extract_title_and_question(llm_response["answer"]) | |
| return (new_title, new_question) | |