# Import import urllib.parse import streamlit as st from RAG_public import RAG from congreso import congreso as c from langchain_core.documents import Document from langchain_core.messages import HumanMessage, AIMessage # Seperate page_content and data def get_pagecontent_metadata(data): """ Separetes page content and metadata of the given document Parameters --------- data: dict Document that has various features such as "id", "mensaje" and "texto"... Returns ------- pagecontent_metadata: dict Creates key/value pairs for page content and metadata.\n "texto" is used for page content, and the rest of the information is used for metadata """ # Checks if values are None # If, then redefine them as empty string # Else, returns its value, or empty string if its value not given for key in data.keys(): if data[key] == None: data[key] = "" else: data[key] = data.get(key, "") search_base_url = "https://www.congreso.es" if data["pdf_url"] != "": data["pdf_url"] = search_base_url + urllib.parse.quote(data["pdf_url"]) # Defines pagecontent and metadata information pagecontent_metadata = { "metadata": {key: data.get(key) for key in data.keys() if key != "texto"}, "page_content" : data["texto"]} return pagecontent_metadata # Load data def read_data(): """ Returns list of documents after reading each document. Uses get_pagecontent_metadata function to seperate content from metadata. Returns ---------- docs: list Document from langchain.schema.document inside a docs list """ # Reads Readme txt files to get information about Congreso RAG and Dataset with open("About_CongresoRAG/CongresoRAG-README.txt") as file: CongresoRAG_readme = file.read().replace("\n", "") with open("About_CongresoRAG/Dataset-README.txt") as file: Dataset_readme = file.read().replace("\n", "") # Put page_content and metadata of these txt file into Document format doc_CongresoRAG = Document(page_content=CongresoRAG_readme, metadata={"pdf_url":"https://huggingface.co/spaces/IIIACSIC/CongresoRAG/blob/main/About_CongresoRAG/CongresoRAG-README.txt"}) doc_Dataset = Document(page_content=Dataset_readme, metadata={"pdf_url":"https://zenodo.org/records/11195944"}) # Creates docs list to store each documents docs = [doc_CongresoRAG, doc_Dataset] terms = ["XV"] t = c.load_jsons(terms) for i in range(0, 100): pagecontent_metadata = get_pagecontent_metadata(t["XV"][i]) document = Document(page_content=pagecontent_metadata["page_content"], metadata=pagecontent_metadata["metadata"]) docs.append(document) return docs # UI (User Interface) def main(): """ Sets page configuration and title\n Reads documents if it is not read yet\n Calls rag model if it is not called yet\n Creates chat history if it is not created yet\n Creates sidebor to display chat history\n Takes user query and connects to the rag model\n Get response from the rag model and displays it on the screen\n """ # Set page configuration st.set_page_config(page_title="CongresoRAG", page_icon="shark") st.title("CongresoRAG") st.markdown("Designed by IIIA-CSIC", unsafe_allow_html=True) # Read documents, if it does not read if "documents" not in st.session_state: st.session_state.documents = read_data() # Calls RAG model if it does not called if "rag" not in st.session_state: st.session_state.rag = RAG(document=st.session_state.documents) st.session_state.rag.model() # Create chat history to store previous question/answer if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Defines user query when it is entered by user user_query = st.chat_input("Message CongresoRAG") # Creates platform to store chat history for message in st.session_state.chat_history: if isinstance(message, HumanMessage): with st.chat_message("human"): st.markdown(message.content) else: with st.chat_message("ai"): st.markdown(message.content) # Takes user query and gets response from the rag model, and store them in chat history if user_query != None and user_query != "": st.session_state.chat_history.append(HumanMessage(user_query)) with st.chat_message("human"): st.markdown(user_query) with st.chat_message("ai"): ai_response0, ai_response1 = st.session_state.rag.conversational_rag_chain(user_query) ai_response = ai_response0 + "\n\n" + "\n\n".join(ai_response1) st.session_state.chat_history.append(AIMessage(ai_response)) st.markdown(ai_response) # Calls main function if __name__ == "__main__": main()