import streamlit as st import getpass import os from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain.schema import StrOutputParser os.environ["OPENAI_API_KEY"] = st.secrets['OPENAI'] # agregada en la config de hugginface os.environ["LANGCHAIN_TRACING_V2"] = "true" #os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI'] # Initialization if 'chain' not in st.session_state: st.session_state['chain'] = 'dummy' def get_data(): return st.session_state["chain"] def add_data(chain): st.session_state["chain"]= chain chain = get_data() if chain == 'dummy': loaders = [ PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf"), PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu2.pdf"), PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu1.pdf"), ] docs = [] for loader in loaders: docs.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000) docs = text_splitter.split_documents(docs) #Modelo QA sentence similarity modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #espaƱol model_kwargs = {'device':'cpu'} # o cuda encode_kwargs = {'normalize_embeddings': False} #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII embeddings = HuggingFaceEmbeddings( model_name=modelPath, # Ruta a modelo Pre entrenado model_kwargs=model_kwargs, # Opciones de configuracion del modelo encode_kwargs=encode_kwargs # Opciones de Encoding ) #DB y retriever db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents. retriever = db.as_retriever(search_kwargs={"k": 3}) template = """Responde la pregunta basado unicamente en el siguiente contexto {contexto} Pregunta: {pregunta} """ #LLM prompt = ChatPromptTemplate.from_template(template) model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0) def format_docs(docs): return "\n\n".join([d.page_content for d in docs]) chain = ( {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()} | prompt | model | StrOutputParser() ) add_data(chain) pregunta = st.text_input('Ingresa algun texto:', value="Has un resumen del documento circu3.pdf") tmp_button = st.button("CLICK") if tmp_button: #Esperar al boton out = chain.invoke(pregunta) st.write(out) #st.rerun() #Restart app else: st.stop()