Spaces:
Build error
Build error
File size: 5,068 Bytes
1766eea e465be1 1766eea 68cf2bf 1766eea 68cf2bf 1766eea 1ba75df 1766eea 1ba75df 1766eea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | # Import
import urllib.parse
import streamlit as st
from RAG_public import RAG
from congreso import congreso as c
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, AIMessage
# Seperate page_content and data
def get_pagecontent_metadata(data):
"""
Separetes page content and metadata of the given document
Parameters
---------
data: dict
Document that has various features such as "id", "mensaje" and "texto"...
Returns
-------
pagecontent_metadata: dict
Creates key/value pairs for page content and metadata.\n
"texto" is used for page content, and the rest of the information is used for metadata
"""
# Checks if values are None
# If, then redefine them as empty string
# Else, returns its value, or empty string if its value not given
for key in data.keys():
if data[key] == None:
data[key] = ""
else:
data[key] = data.get(key, "")
search_base_url = "https://www.congreso.es"
if data["pdf_url"] != "":
data["pdf_url"] = search_base_url + urllib.parse.quote(data["pdf_url"])
# Defines pagecontent and metadata information
pagecontent_metadata = {
"metadata": {key: data.get(key) for key in data.keys() if key != "texto"},
"page_content" : data["texto"]}
return pagecontent_metadata
# Load data
def read_data():
"""
Returns list of documents after reading each document. Uses get_pagecontent_metadata function
to seperate content from metadata.
Returns
----------
docs: list
Document from langchain.schema.document inside a docs list
"""
# Reads Readme txt files to get information about Congreso RAG and Dataset
with open("About_CongresoRAG/CongresoRAG-README.txt") as file:
CongresoRAG_readme = file.read().replace("\n", "")
with open("About_CongresoRAG/Dataset-README.txt") as file:
Dataset_readme = file.read().replace("\n", "")
# Put page_content and metadata of these txt file into Document format
doc_CongresoRAG = Document(page_content=CongresoRAG_readme, metadata={"pdf_url":"https://huggingface.co/spaces/IIIACSIC/CongresoRAG/blob/main/About_CongresoRAG/CongresoRAG-README.txt"})
doc_Dataset = Document(page_content=Dataset_readme, metadata={"pdf_url":"https://zenodo.org/records/11195944"})
# Creates docs list to store each documents
docs = [doc_CongresoRAG, doc_Dataset]
terms = ["XV"]
t = c.load_jsons(terms)
for i in range(0, 100):
pagecontent_metadata = get_pagecontent_metadata(t["XV"][i])
document = Document(page_content=pagecontent_metadata["page_content"], metadata=pagecontent_metadata["metadata"])
docs.append(document)
return docs
# UI (User Interface)
def main():
"""
Sets page configuration and title\n
Reads documents if it is not read yet\n
Calls rag model if it is not called yet\n
Creates chat history if it is not created yet\n
Creates sidebor to display chat history\n
Takes user query and connects to the rag model\n
Get response from the rag model and displays it on the screen\n
"""
# Set page configuration
st.set_page_config(page_title="CongresoRAG", page_icon="shark")
st.title("CongresoRAG")
st.markdown("<small><i style='color: grey;'>Designed by IIIA-CSIC</i></small>", unsafe_allow_html=True)
# Read documents, if it does not read
if "documents" not in st.session_state:
st.session_state.documents = read_data()
# Calls RAG model if it does not called
if "rag" not in st.session_state:
st.session_state.rag = RAG(document=st.session_state.documents)
st.session_state.rag.model()
# Create chat history to store previous question/answer
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Defines user query when it is entered by user
user_query = st.chat_input("Message CongresoRAG")
# Creates platform to store chat history
for message in st.session_state.chat_history:
if isinstance(message, HumanMessage):
with st.chat_message("human"):
st.markdown(message.content)
else:
with st.chat_message("ai"):
st.markdown(message.content)
# Takes user query and gets response from the rag model, and store them in chat history
if user_query != None and user_query != "":
st.session_state.chat_history.append(HumanMessage(user_query))
with st.chat_message("human"):
st.markdown(user_query)
with st.chat_message("ai"):
ai_response0, ai_response1 = st.session_state.rag.conversational_rag_chain(user_query)
ai_response = ai_response0 + "\n\n" + "\n\n".join(ai_response1)
st.session_state.chat_history.append(AIMessage(ai_response))
st.markdown(ai_response)
# Calls main function
if __name__ == "__main__":
main()
|