Spaces:

ABAO77
/

LinhMEGaau

Build error

App Files Files Community

ABAO77 commited on Apr 8, 2025

Commit

24dcddf

verified ·

1 Parent(s): 501814f

Upload 38 files

Browse files

Files changed (38) hide show

app.py +13 -0
requirements.txt +23 -0
src/.DS_Store +0 -0
src/agents/entrance_eval_agent/flow.py +18 -0
src/agents/entrance_eval_agent/func.py +4 -0
src/agents/exercise_gen_agent/flow.py +18 -0
src/agents/exercise_gen_agent/func.py +4 -0
src/agents/highlight_explain_agent/__pycache__/flow.cpython-311.pyc +0 -0
src/agents/highlight_explain_agent/__pycache__/func.cpython-311.pyc +0 -0
src/agents/highlight_explain_agent/__pycache__/prompt.cpython-311.pyc +0 -0
src/agents/highlight_explain_agent/flow.py +27 -0
src/agents/highlight_explain_agent/func.py +25 -0
src/agents/highlight_explain_agent/prompt.py +39 -0
src/agents/lesson_rag_agent/flow.py +18 -0
src/agents/lesson_rag_agent/func.py +4 -0
src/agents/primary_chatbot/__pycache__/flow.cpython-311.pyc +0 -0
src/agents/primary_chatbot/__pycache__/func.cpython-311.pyc +0 -0
src/agents/primary_chatbot/__pycache__/prompt.cpython-311.pyc +0 -0
src/agents/primary_chatbot/flow.py +157 -0
src/agents/primary_chatbot/func.py +169 -0
src/agents/primary_chatbot/prompt.py +177 -0
src/apis/__pycache__/create_app.cpython-311.pyc +0 -0
src/apis/create_app.py +23 -0
src/apis/interfaces/__pycache__/chat_interface.cpython-311.pyc +0 -0
src/apis/interfaces/chat_interface.py +44 -0
src/apis/routers/__pycache__/chat_router.cpython-311.pyc +0 -0
src/apis/routers/chat_router.py +49 -0
src/config/__pycache__/llm.cpython-311.pyc +0 -0
src/config/__pycache__/prompt.cpython-311.pyc +0 -0
src/config/__pycache__/vector_store.cpython-311.pyc +0 -0
src/config/constant.py +0 -0
src/config/llm.py +14 -0
src/config/prompt.py +206 -0
src/config/vector_store.py +37 -0
src/utils/__pycache__/helper.cpython-311.pyc +0 -0
src/utils/__pycache__/logger.cpython-311.pyc +0 -0
src/utils/helper.py +27 -0
src/utils/logger.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from dotenv import load_dotenv
+load_dotenv(override=True)
+from src.apis.create_app import create_app, api_router
+import uvicorn
+app = create_app()
+app.include_router(api_router)
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=3002)

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+langgraph
+langchain
+python-dotenv
+motor
+langchain-community
+langchain-mongodb
+pytz
+PyJWT==2.8.0
+python_jose==3.3.0
+pydantic[email]
+jose
+langchain-google-genai
+python-dateutil
+pandas
+openpyxl
+langchain-redis
+redis
+bs4
+duckduckgo-search
+firebase-admin
+python-dotenv
+fastapi
+uvicorn[standard]

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/agents/entrance_eval_agent/flow.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from langgraph.graph import StateGraph, START, END
+from src.config.llm import llm_2_0
+from .func import State
+class PrimaryChatBot:
+    def __init__(self):
+        self.builder = StateGraph(State)
+    @staticmethod
+    def routing(state: State):
+        pass
+    def node(self):
+        pass
+    def edge(self):
+        pass

src/agents/entrance_eval_agent/func.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from typing import TypedDict
+class State(TypedDict):
+    pass

src/agents/exercise_gen_agent/flow.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from langgraph.graph import StateGraph, START, END
+from src.config.llm import llm_2_0
+from .func import State
+class PrimaryChatBot:
+    def __init__(self):
+        self.builder = StateGraph(State)
+    @staticmethod
+    def routing(state: State):
+        pass
+    def node(self):
+        pass
+    def edge(self):
+        pass

src/agents/exercise_gen_agent/func.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from typing import TypedDict
+class State(TypedDict):
+    pass

src/agents/highlight_explain_agent/__pycache__/flow.cpython-311.pyc ADDED Viewed

Binary file (2.2 kB). View file

src/agents/highlight_explain_agent/__pycache__/func.cpython-311.pyc ADDED Viewed

Binary file (1.31 kB). View file

src/agents/highlight_explain_agent/__pycache__/prompt.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

src/agents/highlight_explain_agent/flow.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from langgraph.graph import StateGraph, START, END
+from .func import State, highlight_explain
+from langgraph.graph.state import CompiledStateGraph
+class HighlightExplainAgent:
+    def __init__(self):
+        self.builder = StateGraph(State)
+    @staticmethod
+    def routing(state: State):
+        pass
+    def node(self):
+        self.builder.add_node("highlight_explain", highlight_explain)
+    def edge(self):
+        self.builder.add_edge(START, "highlight_explain")
+        self.builder.add_edge("highlight_explain", END)
+    def __call__(self) -> CompiledStateGraph:
+        self.node()
+        self.edge()
+        return self.builder.compile()
+highlight_workflow = HighlightExplainAgent()()

src/agents/highlight_explain_agent/func.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from typing import TypedDict, AnyStr
+from .prompt import highlight_explain_chain
+class State(TypedDict):
+    domain: AnyStr
+    highlight_terms: AnyStr
+    adjacent_paragraphs: AnyStr
+    question: AnyStr
+    explanation: AnyStr
+    language: AnyStr
+async def highlight_explain(state: State):
+    response = await highlight_explain_chain.ainvoke(
+        {
+            "domain": state["domain"],
+            "highlight_terms": state["highlight_terms"],
+            "adjacent_paragraphs": state["adjacent_paragraphs"],
+            "question": state["question"],
+            "language": state["language"],
+        }
+    )
+    return {"explanation": response["explanation"]}

src/agents/highlight_explain_agent/prompt.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pydantic import BaseModel, Field
+from langchain_core.prompts import ChatPromptTemplate
+from typing import Literal, Annotated, AnyStr, TypedDict
+from src.config.llm import llm_2_0 as llm
+class HighlightExplain(TypedDict):
+    """Explain the highlight terms in a concise and easy to understand manner."""
+    explanation: Annotated[AnyStr, "The explanation of the highlight terms."]
+highlight_explain_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are a expert in explaining the highlight terms in {domain} domain.
+            You are given the higlight terms, adjacent paragraphs of the highlight terms.
+            Your task is to explain the highlight terms in a concise and easy to understand manner.
+            You are also given the user question.
+            Explanation must be primary in {language} language. But you can use {domain} domain terms in explanation.
+            """,
+        ),
+        (
+            "human",
+            """
+            User question: {question}
+            Highlight terms: {highlight_terms}
+            Adjacent paragraphs: {adjacent_paragraphs}
+            """,
+        ),
+    ]
+)
+highlight_explain_chain = highlight_explain_prompt | llm.with_structured_output(
+    HighlightExplain
+)

src/agents/lesson_rag_agent/flow.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from langgraph.graph import StateGraph, START, END
+from src.config.llm import llm_2_0
+from .func import State
+class PrimaryChatBot:
+    def __init__(self):
+        self.builder = StateGraph(State)
+    @staticmethod
+    def routing(state: State):
+        pass
+    def node(self):
+        pass
+    def edge(self):
+        pass

src/agents/lesson_rag_agent/func.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from typing import TypedDict
+class State(TypedDict):
+    pass

src/agents/primary_chatbot/__pycache__/flow.cpython-311.pyc ADDED Viewed

Binary file (8.08 kB). View file

src/agents/primary_chatbot/__pycache__/func.cpython-311.pyc ADDED Viewed

Binary file (8.73 kB). View file

src/agents/primary_chatbot/__pycache__/prompt.cpython-311.pyc ADDED Viewed

Binary file (7.86 kB). View file

src/agents/primary_chatbot/flow.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from langgraph.graph import StateGraph, START, END
+from langgraph.graph.state import CompiledStateGraph
+from .func import (
+    StateRAGAccuracy,
+    StateRAGSpeed,
+    trim_history,
+    route,
+    transform_query,
+    retrieve_document,
+    grade_document,
+    generate_answer_rag,
+    grade_hallucinations,
+    gen_answer_normal,
+)
+class PrimaryChatBotAccuracy:
+    def __init__(self):
+        self.builder = StateGraph(StateRAGAccuracy)
+    @staticmethod
+    def routing_after_route(state: StateRAGAccuracy):
+        if state["route_response"] == "vectorstore":
+            return "transform_query"
+        else:
+            return "generate_answer_normal"
+    @staticmethod
+    def routing_after_retrieve_document(state: StateRAGAccuracy):
+        return (
+            "grade_document"
+            if len(state["documents"]) != 0
+            else "generate_answer_normal"
+        )
+    @staticmethod
+    def route_after_grade_document(state: StateRAGAccuracy):
+        return (
+            "generate_answer_rag"
+            if len(state["documents"]) != 0
+            else "generate_answer_normal"
+        )
+    @staticmethod
+    def routing_check_pass_grade_hallucinations(state: StateRAGAccuracy):
+        return END if state["grade_response"] == "yes" else "generate_answer_normal"
+    def node(self):
+        self.builder.add_node("trim_history", trim_history)
+        self.builder.add_node("route", route)
+        self.builder.add_node("transform_query", transform_query)
+        self.builder.add_node("retrieve_document", retrieve_document)
+        self.builder.add_node("grade_document", grade_document)
+        self.builder.add_node("generate_answer_rag", generate_answer_rag)
+        self.builder.add_node("grade_hallucinations", grade_hallucinations)
+        self.builder.add_node("generate_answer_normal", gen_answer_normal)
+    def edge(self):
+        self.builder.add_edge(START, "trim_history")
+        self.builder.add_edge("trim_history", "route")
+        self.builder.add_conditional_edges(
+            "route",
+            self.routing_after_route,
+            {
+                "transform_query": "transform_query",
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_edge("transform_query", "retrieve_document")
+        self.builder.add_conditional_edges(
+            "retrieve_document",
+            self.routing_after_retrieve_document,
+            {
+                "grade_document": "grade_document",
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_conditional_edges(
+            "grade_document",
+            self.route_after_grade_document,
+            {
+                "generate_answer_rag": "generate_answer_rag",
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_edge("generate_answer_rag", "grade_hallucinations")
+        self.builder.add_conditional_edges(
+            "grade_hallucinations",
+            self.routing_check_pass_grade_hallucinations,
+            {
+                END: END,
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_edge("generate_answer_normal", END)
+    def __call__(self) -> CompiledStateGraph:
+        self.node()
+        self.edge()
+        return self.builder.compile()
+class PrimaryChatBotSpeed:
+    def __init__(self):
+        self.builder = StateGraph(StateRAGSpeed)
+    @staticmethod
+    def routing_after_retrieve_document(state: StateRAGAccuracy):
+        return (
+            "generate_answer_rag"
+            if len(state["documents"]) != 0
+            else "generate_answer_normal"
+        )
+    @staticmethod
+    def routing_after_gen_answer_rag(state: StateRAGAccuracy):
+        return END if state["document_id_selected"] else "generate_answer_normal"
+    def node(self):
+        self.builder.add_node("trim_history", trim_history)
+        self.builder.add_node("transform_query", transform_query)
+        self.builder.add_node("retrieve_document", retrieve_document)
+        self.builder.add_node("generate_answer_rag", generate_answer_rag)
+        self.builder.add_node("generate_answer_normal", gen_answer_normal)
+    def edge(self):
+        self.builder.add_edge(START, "trim_history")
+        self.builder.add_edge("trim_history", "transform_query")
+        self.builder.add_edge("transform_query", "retrieve_document")
+        self.builder.add_conditional_edges(
+            "retrieve_document",
+            self.routing_after_retrieve_document,
+            {
+                "generate_answer_rag": "generate_answer_rag",
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_conditional_edges(
+            "generate_answer_rag",
+            self.routing_after_gen_answer_rag,
+            {
+                END: END,
+                "generate_answer_normal": "generate_answer_normal",
+            },
+        )
+        self.builder.add_edge("generate_answer_normal", END)
+    def __call__(self) -> CompiledStateGraph:
+        self.node()
+        self.edge()
+        return self.builder.compile()
+rag_speed = PrimaryChatBotSpeed()()
+rag_accuracy = PrimaryChatBotAccuracy()()
+#

src/agents/primary_chatbot/func.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import os
+from typing import TypedDict, Optional, List, Literal
+from langchain_core.documents import Document
+from src.utils.helper import (
+    fake_token_counter,
+    convert_list_context_source_to_str,
+    convert_message,
+)
+from src.utils.logger import logger
+from langchain_core.messages import trim_messages, AnyMessage
+from src.config.vector_store import vector_store_chatbot, vector_store_tutor
+from .prompt import (
+    RouteQuery,
+    route_chain,
+    transform_query_chain,
+    ExtractFilter,
+    extract_filter_chain,
+    GradeDocuments,
+    GenerateAnswer,
+    GradeHallucinations,
+    gen_normal_answer_chain,
+    gen_answer_rag_chain,
+    grade_documents_chain,
+    gen_answer_rag_chain,
+    grade_documents_chain,
+    grade_hallucinations_chain,
+)
+class StateRAGAccuracy(TypedDict):
+    user_query: str | AnyMessage
+    route_response: str
+    messages_history: list
+    documents: list[Document]
+    filter: dict
+    llm_response: AnyMessage
+    grade_response: Literal["yes", "no"]
+    language: str
+    document_id_selected: Optional[List]
+class StateRAGSpeed(TypedDict):
+    user_query: str | AnyMessage
+    messages_history: list
+    documents: list[Document]
+    filter: dict
+    llm_response: AnyMessage
+    language: str
+    document_id_selected: Optional[List]
+def trim_history(state: StateRAGAccuracy | StateRAGSpeed):
+    history = (
+        convert_message(state["messages_history"])
+        if state.get("messages_history")
+        else None
+    )
+    if not history:
+        return {"messages_history": []}
+    chat_message_history = trim_messages(
+        history,
+        strategy="last",
+        token_counter=fake_token_counter,
+        max_tokens=int(os.getenv("HISTORY_TOKEN_LIMIT", 2000)),
+        start_on="human",
+        end_on="ai",
+        include_system=False,
+        allow_partial=False,
+    )
+    return {"messages_history": chat_message_history}
+async def route(state: StateRAGAccuracy):
+    logger.info(f"routing")
+    question = state["user_query"]
+    chat_history = state.get("messages_history", None)
+    route_response: RouteQuery = await route_chain.ainvoke(
+        {"question": question, "chat_history": chat_history}
+    )
+    logger.info(f"Route response: {route_response.datasource}")
+    return {"route_response": route_response.datasource}
+async def transform_query(state: StateRAGAccuracy | StateRAGSpeed):
+    question = state["user_query"]
+    chat_history = state.get("messages_history", None)
+    transform_response = await transform_query_chain.ainvoke(
+        {"question": question, "chat_history": chat_history}
+    )
+    logger.info(f"Transform response: {transform_response.content}")
+    return {"user_query": transform_response.content}
+async def retrieve_document(state: StateRAGAccuracy):
+    question = state["user_query"]
+    filter = state.get("filter", {})
+    if filter:
+        retriever = vector_store_tutor.as_retriever(
+            search_type="similarity_score_threshold",
+            search_kwargs={"k": 3, "score_threshold": 0.3},
+        )
+    else:
+        retriever = vector_store_chatbot.as_retriever(
+            search_type="similarity_score_threshold",
+            search_kwargs={"k": 3, "score_threshold": 0.0},
+        )
+    documents = retriever.invoke(question, filter=filter)
+    show_doc = " \n =============\n".join([doc.page_content for doc in documents])
+    logger.info(f"Retrieved documents: {show_doc}")
+    return {"documents": documents}
+async def grade_document(state: StateRAGAccuracy):
+    question = state["user_query"]
+    documents = state["documents"]
+    inputs_bach = [
+        {"question": question, "document": doc.page_content} for doc in documents
+    ]
+    grade_document_response: list[GradeDocuments] = await grade_documents_chain.abatch(
+        inputs_bach
+    )
+    logger.info(f"Grade response: {grade_document_response}")
+    document_index = [
+        index
+        for index, doc in enumerate(grade_document_response)
+        if doc.binary_score == "yes"
+    ]
+    filtered_documents = [documents[i] for i in document_index]
+    return {"documents": filtered_documents}
+async def generate_answer_rag(state: StateRAGAccuracy):
+    question = state["user_query"]
+    documents = state["documents"]
+    language = state["language"]
+    context_str = convert_list_context_source_to_str(documents)
+    gen_answer_response: GenerateAnswer = await gen_answer_rag_chain.ainvoke(
+        {"question": question, "context": context_str, "language": language}
+    )
+    logger.info(f"Generate answer response: {gen_answer_response}")
+    id_selected = gen_answer_response.selected_document_index
+    return {
+        "llm_response": gen_answer_response.answer,
+        "document_id_selected": id_selected,
+    }
+async def grade_hallucinations(state: StateRAGAccuracy):
+    question = state["user_query"]
+    llm_response = state["llm_response"]
+    grade_response: GradeHallucinations = await grade_hallucinations_chain.ainvoke(
+        {"question": question, "generation": llm_response}
+    )
+    return {"grade_response": grade_response.binary_score}
+async def gen_answer_normal(state: StateRAGAccuracy):
+    question = state["user_query"]
+    history = state["messages_history"]
+    gen_answer_response = await gen_normal_answer_chain.ainvoke(
+        {"question": question, "history": history}
+    )
+    final_response = gen_answer_response.content + "\nNguồn thông tin: Kiến thức của AI"
+    return {"llm_response": final_response}

src/agents/primary_chatbot/prompt.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from pydantic import BaseModel, Field
+from langchain_core.prompts import ChatPromptTemplate
+from typing import Literal
+from src.config.llm import llm_2_0 as llm
+from typing import Optional
+class RouteQuery(BaseModel):
+    """Route a user query to the most relevant datasource."""
+    datasource: Literal["vectorstore", "casual_convo"] = Field(
+        ...,
+        description="Given a user question choose to route it to casual_convo or a vectorstore.",
+    )
+class ExtractFilter(BaseModel):
+    """Extract job level and job title from user question."""
+    job_level: str = Field(description="The level of the job the user is asking about.")
+    job_title: str = Field(description="The title of the job the user is asking about.")
+class GradeDocuments(BaseModel):
+    """Binary score for relevance check on retrieved documents."""
+    binary_score: str = Field(
+        description="Documents are relevant to the question, 'yes' or 'no'"
+    )
+class GenerateAnswer(BaseModel):
+    """Generate an answer based on the provided documents."""
+    answer: str = Field(description="Generated answer based on the provided documents.")
+    selected_document_index: Optional[list[int]] = Field(
+        description="Index of the selected document. If not have relevant document then leave it None"
+    )
+class GradeHallucinations(BaseModel):
+    """Binary score for grounding of generation answer in provided facts."""
+    binary_score: Literal["yes", "no"] = Field(
+        description="Whether the answer is grounded in the provided facts. 'yes' if the answer is supported by facts, 'no' if the answer contains information not present or contradicting the given facts"
+    )
+route_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are an expert at routing the user's question to vectorstore or casual_convo in {topic} platform.
+choose vectorstore if the question is related to {topic} and casual_convo otherwise. \n
+example:
+user: Hi are you [this is a random question not related to {topic} so route to casual_convo] : casual_convo
+user: Calculate,... [this question is related to education, system information so route to vectorstore] : vectorstore""",
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+).partial(topic="education")
+re_write_query_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You a question re-writer that converts an input question to a better version that is optimized
+    for vectorstore retrieval, and very concise. Look at the input and try to reason about the underlying semantic intent/meaning. The input can also be a
+    follow up question, look at the chat history to re-write the question to include necessary info from the chat history to a better version that is optimized
+    for vectorstore retrieval without any other info needed. [the topic of convo will be generally around {topic} topic. You need to re-write query base on history and include keyword related to this topic""",
+        ),
+        ("placeholder", "{history}"),
+        (
+            "human",
+            "{question}",
+        ),
+    ]
+).partial(topic="education")
+extract_filter_prompt = ChatPromptTemplate.from_messages(
+    [
+        (
+            "system",
+            """You are an expert at extracting metadata from the user's question about {topic} topic and using it to filter the retrieved documents.
+""",
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+).partial(topic="education")
+check_relevant_document_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """
+You are a grader assessing relevance of a retrieved document to a user question.
+If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
+It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
+Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
+Then, give a score ranges from 0 to 1, with higher values indicating a stronger match and the more corresponding keywords.
+""",
+        ),
+        (
+            "human",
+            "Retrieved document: \n\n {document} \nvs\n User question: {question}",
+        ),
+    ]
+)
+gen_answer_rag_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are chat bot related to {topic}. You are asked to generate an answer based on the provided documents.
+Your are given context related to job description of a job position. If the context not provided, you just say 'không có tài liệu liên quan'
+Answer in {language} language.
+Context:
+```
+{context}
+```
+""",
+        ),
+        (
+            "human",
+            """
+    Question: {question}
+                """,
+        ),
+    ]
+).partial(topic="education", language="vietnamese")
+grade_answer_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are a grader assessing whether an answer addresses / resolves a question \n
+    Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question.
+    If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'.""",
+        ),
+        (
+            "human",
+            "If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'. User question: \n\n {question} \n\n LLM generation: {generation}",
+        ),
+    ]
+)
+gen_normal_answer_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """Bạn là chatbot giải đáp câu hỏi của người dùng dựa trên đoạn hội thoại liên quan đến lĩnh vực giáo dục
+            """,
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+)
+route_chain = route_prompt | llm.with_structured_output(RouteQuery)
+transform_query_chain = re_write_query_prompt | llm
+extract_filter_chain = extract_filter_prompt | llm.with_structured_output(ExtractFilter)
+grade_documents_chain = check_relevant_document_prompt | llm.with_structured_output(
+    GradeDocuments
+)
+gen_answer_rag_chain = gen_answer_rag_prompt | llm.with_structured_output(
+    GenerateAnswer
+)
+gen_normal_answer_chain = gen_normal_answer_prompt | llm
+grade_hallucinations_chain = grade_answer_prompt | llm.with_structured_output(
+    GradeHallucinations
+)

src/apis/__pycache__/create_app.cpython-311.pyc ADDED Viewed

Binary file (980 Bytes). View file

src/apis/create_app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from fastapi import FastAPI, APIRouter
+from fastapi.middleware.cors import CORSMiddleware
+from src.apis.routers.chat_router import router as router_chat
+api_router = APIRouter()
+api_router.include_router(router_chat)
+def create_app():
+    app = FastAPI(
+        docs_url="/",
+        title="AI Serivce",
+    )
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    return app

src/apis/interfaces/__pycache__/chat_interface.cpython-311.pyc ADDED Viewed

Binary file (2.61 kB). View file

src/apis/interfaces/chat_interface.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import Optional
+from pydantic import BaseModel, Field
+class ChatBody(BaseModel):
+    query: str = Field(..., title="User's query messages")
+    history: Optional[list] = Field(None, title="Chat history")
+    language: Optional[str] = Field("en", title="Language")
+    topic: Optional[str] = Field("education", title="Topic")
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "query": "Hệ thống có những tính năng gì",
+                "history": [
+                    {"content": "Bạn là ai vậy", "type": "human"},
+                    {
+                        "content": "Tôi là AI hỗ trợ cho hệ thống LearnMigo",
+                        "type": "ai",
+                    },
+                ],
+                "language": "Vietnamese",
+            }
+        }
+    }
+class HighlightExplainBody(BaseModel):
+    domain: str = Field(..., title="Domain")
+    question: str = Field(..., title="User's query messages")
+    highlight_terms: str = Field(..., title="Highlight terms")
+    adjacent_paragraphs: str = Field(..., title="Adjacent paragraphs")
+    language: str = Field("Vietnamese", title="Language")
+    model_config = {
+        "json_schema_extra": {
+            "example": {
+                "language": "Vietnamese",
+                "domain": "Machine Learning",
+                "question": "What does overfitting mean and why is it a problem?",
+                "highlight_terms": "overfitting",
+                "adjacent_paragraphs": "Overfitting happens when a machine learning model performs well on the training data but poorly on unseen data. This is because the model has learned not just the underlying patterns but also the noise in the training dataset. In contrast, a well-generalized model captures patterns that apply to new data as well.",
+            }
+        }
+    }

src/apis/routers/__pycache__/chat_router.cpython-311.pyc ADDED Viewed

Binary file (2.76 kB). View file

src/apis/routers/chat_router.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from fastapi import APIRouter, status, Depends
+from fastapi.responses import JSONResponse
+from typing import Annotated
+from src.apis.interfaces.chat_interface import ChatBody, HighlightExplainBody
+from src.agents.primary_chatbot.flow import rag_accuracy, rag_speed
+from src.agents.highlight_explain_agent.flow import highlight_workflow
+router = APIRouter(prefix="/ai", tags=["AI"])
+@router.post("/rag_accuracy")
+async def primary_chat_accuracy(body: ChatBody):
+    response = await rag_accuracy.ainvoke(
+        {
+            "user_query": body.query,
+            "messages_history": body.history,
+            "language": body.language,
+        }
+    )
+    final_response = response["llm_response"]
+    return JSONResponse(status_code=status.HTTP_200_OK, content=final_response)
+@router.post("/rag_speed")
+async def primary_chat_speed(body: ChatBody):
+    response = await rag_speed.ainvoke(
+        {
+            "user_query": body.query,
+            "messages_history": body.history,
+            "language": body.language,
+        }
+    )
+    final_response = response["llm_response"]
+    return JSONResponse(status_code=status.HTTP_200_OK, content=final_response)
+@router.post("/highlight_explain")
+async def highlight_explain(body: HighlightExplainBody):
+    response = await highlight_workflow.ainvoke(
+        {
+            "domain": body.domain,
+            "question": body.question,
+            "highlight_terms": body.highlight_terms,
+            "adjacent_paragraphs": body.adjacent_paragraphs,
+            "language": body.language,
+        }
+    )
+    final_response = response["explanation"]
+    return JSONResponse(status_code=status.HTTP_200_OK, content=final_response)

src/config/__pycache__/llm.cpython-311.pyc ADDED Viewed

Binary file (674 Bytes). View file

src/config/__pycache__/prompt.cpython-311.pyc ADDED Viewed

Binary file (8.98 kB). View file

src/config/__pycache__/vector_store.cpython-311.pyc ADDED Viewed

Binary file (868 Bytes). View file

src/config/constant.py ADDED Viewed

File without changes

src/config/llm.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
+llm_2_0 = ChatGoogleGenerativeAI(
+    model="gemini-2.0-flash",
+    temperature=0.1,
+    max_retries=2,
+)
+llm_1_5 = ChatGoogleGenerativeAI(
+    model="gemini-1.5-flash",
+    temperature=0.1,
+    max_retries=2,
+)
+embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

src/config/prompt.py ADDED Viewed

	@@ -0,0 +1,206 @@

+from pydantic import BaseModel, Field
+from langchain_core.prompts import ChatPromptTemplate
+from typing import Literal
+from src.config.llm import llm_2_0 as llm
+from typing import Optional
+class RouteQuery(BaseModel):
+    """Route a user query to the most relevant datasource."""
+    datasource: Literal["vectorstore", "casual_convo"] = Field(
+        ...,
+        description="Given a user question choose to route it to casual_convo or a vectorstore.",
+    )
+class ExtractFilter(BaseModel):
+    """Extract job level and job title from user question."""
+    job_level: str = Field(description="The level of the job the user is asking about.")
+    job_title: str = Field(description="The title of the job the user is asking about.")
+class GradeDocuments(BaseModel):
+    """Binary score for relevance check on retrieved documents."""
+    binary_score: str = Field(
+        description="Documents are relevant to the question, 'yes' or 'no'"
+    )
+class GenerateAnswer(BaseModel):
+    """Generate an answer based on the provided documents."""
+    answer: str = Field(description="Generated answer based on the provided documents.")
+    selected_document_index: Optional[list[int]] = Field(
+        description="Index of the selected document. If not have relevant document then leave it None"
+    )
+class GradeHallucinations(BaseModel):
+    """Binary score for grounding of generation answer in provided facts."""
+    binary_score: Literal["yes", "no"] = Field(
+        description="Whether the answer is grounded in the provided facts. 'yes' if the answer is supported by facts, 'no' if the answer contains information not present or contradicting the given facts"
+    )
+class HighlightExplain(BaseModel):
+    """Explain the highlight terms in a concise and easy to understand manner."""
+    explanation: str = Field(description="Explanation of the highlight terms.")
+route_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are an expert at routing the user's question to vectorstore or casual_convo in {topic} platform.
+choose vectorstore if the question is related to {topic} and casual_convo otherwise. \n
+example:
+user: Hi are you [this is a random question not related to {topic} so route to casual_convo] : casual_convo
+user: Calculate,... [this question is related to education, system information so route to vectorstore] : vectorstore""",
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+).partial(topic="education")
+re_write_query_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You a question re-writer that converts an input question to a better version that is optimized
+    for vectorstore retrieval, and very concise. Look at the input and try to reason about the underlying semantic intent/meaning. The input can also be a
+    follow up question, look at the chat history to re-write the question to include necessary info from the chat history to a better version that is optimized
+    for vectorstore retrieval without any other info needed. [the topic of convo will be generally around {topic} topic. You need to re-write query base on history and include keyword related to this topic""",
+        ),
+        ("placeholder", "{history}"),
+        (
+            "human",
+            "{question}",
+        ),
+    ]
+).partial(topic="education")
+extract_filter_prompt = ChatPromptTemplate.from_messages(
+    [
+        (
+            "system",
+            """You are an expert at extracting metadata from the user's question about {topic} topic and using it to filter the retrieved documents.
+""",
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+).partial(topic="education")
+check_relevant_document_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """
+You are a grader assessing relevance of a retrieved document to a user question.
+If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
+It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
+Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
+Then, give a score ranges from 0 to 1, with higher values indicating a stronger match and the more corresponding keywords.
+""",
+        ),
+        (
+            "human",
+            "Retrieved document: \n\n {document} \nvs\n User question: {question}",
+        ),
+    ]
+)
+gen_answer_rag_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are chat bot related to {topic}. You are asked to generate an answer based on the provided documents.
+Your are given context related to job description of a job position. If the context not provided, you just say 'không có tài liệu liên quan'
+Answer in {language} language.
+Context:
+```
+{context}
+```
+""",
+        ),
+        (
+            "human",
+            """
+    Question: {question}
+                """,
+        ),
+    ]
+).partial(topic="education", language="vietnamese")
+grade_answer_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are a grader assessing whether an answer addresses / resolves a question \n
+    Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question.
+    If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'.""",
+        ),
+        (
+            "human",
+            "If the LLM Generation is saying that it doesnt know or not sure or stating to keep the questions relevant to topic , grade it as 'yes'. User question: \n\n {question} \n\n LLM generation: {generation}",
+        ),
+    ]
+)
+gen_normal_answer_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """Bạn là chatbot giải đáp câu hỏi của người dùng dựa trên đoạn hội thoại liên quan đến lĩnh vực giáo dục
+            """,
+        ),
+        ("placeholder", "{history}"),
+        ("human", "{question}"),
+    ]
+)
+highlight_explain_prompt = ChatPromptTemplate(
+    [
+        (
+            "system",
+            """You are a expert in explaining the highlight terms in {domain} domain.
+            You are given the higlight terms, adjacent paragraphs of the highlight terms.
+            Your task is to explain the highlight terms in a concise and easy to understand manner.
+            You are also given the user question.
+            """,
+        ),
+        (
+            "human",
+            """
+            User question: {question}
+            Highlight terms: {highlight_terms}
+            Adjacent paragraphs: {adjacent_paragraphs}
+            """,
+        ),
+    ]
+)
+route_chain = route_prompt | llm.with_structured_output(RouteQuery)
+transform_query_chain = re_write_query_prompt | llm
+extract_filter_chain = extract_filter_prompt | llm.with_structured_output(ExtractFilter)
+grade_documents_chain = check_relevant_document_prompt | llm.with_structured_output(
+    GradeDocuments
+)
+gen_answer_rag_chain = gen_answer_rag_prompt | llm.with_structured_output(
+    GenerateAnswer
+)
+gen_normal_answer_chain = gen_normal_answer_prompt | llm
+grade_hallucinations_chain = grade_answer_prompt | llm.with_structured_output(
+    GradeHallucinations
+)
+highlight_explain_chain = highlight_explain_prompt | llm.with_structured_output(
+    HighlightExplain
+)

src/config/vector_store.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from langchain_mongodb import MongoDBAtlasVectorSearch
+from pymongo import MongoClient
+from .llm import embeddings
+import os
+from langchain_pinecone import PineconeVectorStore
+# client = MongoClient(os.getenv("MONGO_CONNECTION_STR"))
+# DB_NAME = os.getenv("DB_NAME")
+# COLLECTION_NAME = os.getenv("COLLECTION_NAME")
+# ATLAS_VECTOR_CHATBOT_INDEX_NAME = os.getenv("ATLAS_VECTOR_CHATBOT_INDEX_NAME")
+# ATLAS_VECTOR_TUTOR_INDEX_NAME = os.getenv("ATLAS_VECTOR_TUTOR_INDEX_NAME")
+# MONGODB_COLLECTION_CHATBOT = client[DB_NAME][ATLAS_VECTOR_CHATBOT_INDEX_NAME]
+# MONGODB_COLLECTION_TUTOR = client[DB_NAME][ATLAS_VECTOR_TUTOR_INDEX_NAME]
+# vector_store_chatbot = MongoDBAtlasVectorSearch(
+#     collection=MONGODB_COLLECTION_CHATBOT,
+#     embedding=embeddings,
+#     index_name=ATLAS_VECTOR_CHATBOT_INDEX_NAME,
+#     relevance_score_fn="cosine",
+# )
+# vector_store_tutor = MongoDBAtlasVectorSearch(
+#     collection=MONGODB_COLLECTION_TUTOR,
+#     embedding=embeddings,
+#     index_name=ATLAS_VECTOR_TUTOR_INDEX_NAME,
+#     relevance_score_fn="cosine",
+# )
+API_PINCONE_KEY = os.getenv("PINECONE_API_KEY")
+index_tutor = "tutor-vector-store"
+index_chatbot = "chatbot-vector-store"
+vector_store_tutor = PineconeVectorStore(
+    index_name=index_tutor, embedding=embeddings, pinecone_api_key=API_PINCONE_KEY
+)
+vector_store_chatbot = PineconeVectorStore(
+    index_name=index_chatbot, embedding=embeddings, pinecone_api_key=API_PINCONE_KEY
+)

src/utils/__pycache__/helper.cpython-311.pyc ADDED Viewed

Binary file (2.24 kB). View file

src/utils/__pycache__/logger.cpython-311.pyc ADDED Viewed

Binary file (3.92 kB). View file

src/utils/helper.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from langchain_core.documents import Document
+from typing import Union
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
+def fake_token_counter(messages: Union[list[BaseMessage], BaseMessage]) -> int:
+    if isinstance(messages, list):
+        return sum(len(message.content.split()) for message in messages)
+    return len(messages.content.split())
+def convert_list_context_source_to_str(contexts: list[Document]):
+    formatted_str = ""
+    for i, context in enumerate(contexts):
+        formatted_str += f"Document index {i}:\nContent: {context.page_content}\n"
+        formatted_str += "----------------------------------------------\n\n"
+    return formatted_str
+def convert_message(messages):
+    list_message = []
+    for message in messages:
+        if message["type"] == "human":
+            list_message.append(HumanMessage(content=message["content"]))
+        else:
+            list_message.append(AIMessage(content=message["content"]))
+    return list_message

src/utils/logger.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+import pytz
+class CoreCFG:
+    PROJECT_NAME = "SCHEDULE AI"
+    BOT_NAME = str("SCHEDULE AI")
+def get_date_time():
+    return datetime.now(pytz.timezone("Asia/Ho_Chi_Minh"))
+DATE_TIME = get_date_time().date()
+BASE_DIR = os.path.dirname(Path(__file__).parent.parent)
+LOG_DIR = os.path.join(BASE_DIR, "logs")
+class CustomFormatter(logging.Formatter):
+    green = "\x1b[0;32m"
+    grey = "\x1b[38;5;248m"
+    yellow = "\x1b[38;5;229m"
+    red = "\x1b[31;20m"
+    bold_red = "\x1b[31;1m"
+    blue = "\x1b[38;5;31m"
+    white = "\x1b[38;5;255m"
+    reset = "\x1b[38;5;15m"
+    base_format = f"{grey}%(asctime)s | %(name)s | %(threadName)s | {{level_color}}%(levelname)-8s{grey} | {blue}%(module)s:%(lineno)d{grey} - {white}%(message)s"
+    FORMATS = {
+        logging.INFO: base_format.format(level_color=green),
+        logging.WARNING: base_format.format(level_color=yellow),
+        logging.ERROR: base_format.format(level_color=red),
+        logging.CRITICAL: base_format.format(level_color=bold_red),
+    }
+    def format(self, record):
+        log_fmt = self.FORMATS.get(record.levelno)
+        formatter = logging.Formatter(log_fmt)
+        return formatter.format(record)
+def custom_logger(app_name="APP"):
+    logger_r = logging.getLogger(name=app_name)
+    # Set the timezone to Ho_Chi_Minh
+    tz = pytz.timezone("Asia/Ho_Chi_Minh")
+    logging.Formatter.converter = lambda *args: datetime.now(tz).timetuple()
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    ch.setFormatter(CustomFormatter())
+    logger_r.setLevel(logging.INFO)
+    logger_r.addHandler(ch)
+    return logger_r
+logger = custom_logger(app_name=CoreCFG.PROJECT_NAME)