import os import gradio as gr from typing import List from langchain.chat_models import init_chat_model from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langgraph.prebuilt import create_react_agent from langgraph.checkpoint.memory import MemorySaver from langchain_core.tools import tool from langchain_core.documents import Document os.environ["LANGCHAIN_TRACING_V2"] = "true" os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com" os.environ["LANGCHAIN_API_KEY"] = os.environ.get("LANGCHAIN_API_KEY", "") os.environ["LANGCHAIN_PROJECT"] = "KlagRAG" os.environ["GOOGLE_API_KEY"] = os.environ.get("GOOGLE_API_KEY", "") # Load embeddings and vectorstore embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model) def format_doc_with_source(docs: List[Document]): documents_separator = '\n\n[SEP]\n\n' return documents_separator.join([f"SOURCE LINK: {doc.metadata['source']}\nCONTENT: {doc.page_content}" for doc in docs]) @tool(response_format="content_and_artifact") def retrieve(query: str): """Retrieve information related to a query from University's website.""" retrieved_docs = vector_store.similarity_search(query, k=3) serialized = format_doc_with_source(retrieved_docs) return serialized, retrieved_docs memory = MemorySaver() llm = init_chat_model("gemini-2.0-flash", model_provider="google_genai") # Agent executor system_prompt = "You are an assistant for question-answering tasks in university. " \ "You help future students, current students and university staff to find relevant information. " \ "Use the following pieces of retrieved context to answer the question. " \ "If you don't know the answer, just say that you don't know. " \ "Keep the answer concise and structured. Always provide a links to the sources you used and refer to." \ "You can ask user questions to give more specific answers, if needed." agent_executor = create_react_agent(llm, [retrieve], checkpointer=memory, prompt=system_prompt) def respond(user_input): response = agent_executor.invoke({"input": user_input}) return response.get("output", "No response.") demo = gr.ChatInterface( respond, ) if __name__ == "__main__": demo.launch()