LinhMEGaau / src /config /prompt.py
ABAO77's picture
Upload 38 files
24dcddf verified
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from typing import Literal
from src.config.llm import llm_2_0 as llm
from typing import Optional
class RouteQuery(BaseModel):
"""Route a user query to the most relevant datasource."""
datasource: Literal["vectorstore", "casual_convo"] = Field(
...,
description="Given a user question choose to route it to casual_convo or a vectorstore.",
)
class ExtractFilter(BaseModel):
"""Extract job level and job title from user question."""
job_level: str = Field(description="The level of the job the user is asking about.")
job_title: str = Field(description="The title of the job the user is asking about.")
class GradeDocuments(BaseModel):
"""Binary score for relevance check on retrieved documents."""
binary_score: str = Field(
description="Documents are relevant to the question, 'yes' or 'no'"
)
class GenerateAnswer(BaseModel):
"""Generate an answer based on the provided documents."""
answer: str = Field(description="Generated answer based on the provided documents.")
selected_document_index: Optional[list[int]] = Field(
description="Index of the selected document. If not have relevant document then leave it None"
)
class GradeHallucinations(BaseModel):
"""Binary score for grounding of generation answer in provided facts."""
binary_score: Literal["yes", "no"] = Field(
description="Whether the answer is grounded in the provided facts. 'yes' if the answer is supported by facts, 'no' if the answer contains information not present or contradicting the given facts"
)
class HighlightExplain(BaseModel):
"""Explain the highlight terms in a concise and easy to understand manner."""
explanation: str = Field(description="Explanation of the highlight terms.")
route_prompt = ChatPromptTemplate(
[
(
"system",
"""You are an expert at routing the user's question to vectorstore or casual_convo in {topic} platform.
choose vectorstore if the question is related to {topic} and casual_convo otherwise. \n
example:
user: Hi are you [this is a random question not related to {topic} so route to casual_convo] : casual_convo
user: Calculate,... [this question is related to education, system information so route to vectorstore] : vectorstore""",
),
("placeholder", "{history}"),
("human", "{question}"),
]
).partial(topic="education")
re_write_query_prompt = ChatPromptTemplate(
[
(
"system",
"""You a question re-writer that converts an input question to a better version that is optimized
for vectorstore retrieval, and very concise. Look at the input and try to reason about the underlying semantic intent/meaning. The input can also be a
follow up question, look at the chat history to re-write the question to include necessary info from the chat history to a better version that is optimized
for vectorstore retrieval without any other info needed. [the topic of convo will be generally around {topic} topic. You need to re-write query base on history and include keyword related to this topic""",
),
("placeholder", "{history}"),
(
"human",
"{question}",
),
]
).partial(topic="education")
extract_filter_prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an expert at extracting metadata from the user's question about {topic} topic and using it to filter the retrieved documents.
""",
),
("placeholder", "{history}"),
("human", "{question}"),
]
).partial(topic="education")
check_relevant_document_prompt = ChatPromptTemplate(
[
(
"system",
"""
You are a grader assessing relevance of a retrieved document to a user question.
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
Then, give a score ranges from 0 to 1, with higher values indicating a stronger match and the more corresponding keywords.
""",
),
(
"human",
"Retrieved document: \n\n {document} \nvs\n User question: {question}",
),
]
)
gen_answer_rag_prompt = ChatPromptTemplate(
[
(
"system",
"""You are chat bot related to {topic}. You are asked to generate an answer based on the provided documents.
Your are given context related to job description of a job position. If the context not provided, you just say 'không có tài liệu liên quan'
Answer in {language} language.
Context:
```
{context}
```
""",
),
(
"human",
"""
Question: {question}
""",
),
]
).partial(topic="education", language="vietnamese")
grade_answer_prompt = ChatPromptTemplate(
[
(
"system",
"""You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question.
If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'.""",
),
(
"human",
"If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'. User question: \n\n {question} \n\n LLM generation: {generation}",
),
]
)
gen_normal_answer_prompt = ChatPromptTemplate(
[
(
"system",
"""Bạn là chatbot giải đáp câu hỏi của người dùng dựa trên đoạn hội thoại liên quan đến lĩnh vực giáo dục
""",
),
("placeholder", "{history}"),
("human", "{question}"),
]
)
highlight_explain_prompt = ChatPromptTemplate(
[
(
"system",
"""You are a expert in explaining the highlight terms in {domain} domain.
You are given the higlight terms, adjacent paragraphs of the highlight terms.
Your task is to explain the highlight terms in a concise and easy to understand manner.
You are also given the user question.
""",
),
(
"human",
"""
User question: {question}
Highlight terms: {highlight_terms}
Adjacent paragraphs: {adjacent_paragraphs}
""",
),
]
)
route_chain = route_prompt | llm.with_structured_output(RouteQuery)
transform_query_chain = re_write_query_prompt | llm
extract_filter_chain = extract_filter_prompt | llm.with_structured_output(ExtractFilter)
grade_documents_chain = check_relevant_document_prompt | llm.with_structured_output(
GradeDocuments
)
gen_answer_rag_chain = gen_answer_rag_prompt | llm.with_structured_output(
GenerateAnswer
)
gen_normal_answer_chain = gen_normal_answer_prompt | llm
grade_hallucinations_chain = grade_answer_prompt | llm.with_structured_output(
GradeHallucinations
)
highlight_explain_chain = highlight_explain_prompt | llm.with_structured_output(
HighlightExplain
)