Spaces:
Runtime error
Runtime error
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_community.embeddings import BedrockEmbeddings | |
| from langchain_aws import ChatBedrock | |
| from langchain_community.vectorstores import Chroma | |
| #Las variables de ambiente AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION | |
| #se deben configurar en la línea de comando del sistema operativo | |
| def initLLM(): | |
| return ChatBedrock(model_id="anthropic.claude-3-sonnet-20240229-v1:0") | |
| def initEmbedder(): | |
| return BedrockEmbeddings(model_id='amazon.titan-embed-text-v1') | |
| def initChromaDB(document_chunks,embbeder): | |
| return Chroma.from_documents(document_chunks,embedding=embbeder, persist_directory='./data') | |
| def embedding(thePathFile,embedder): | |
| #cargar el archivo PDF | |
| loader = PyPDFLoader(thePathFile) | |
| pages = loader.load() | |
| print(len(pages)) | |
| #hacer chunk de 500 caracteres | |
| document_splitter=CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=100) | |
| document_chunks=document_splitter.split_documents(pages) | |
| print(len(document_chunks)) | |
| print(embedder) | |
| if embedder is not None: | |
| print("Cargando a la base vectorial...") | |
| vectorDB=initChromaDB(document_chunks, embedder) | |
| print("Fin carga") | |
| return vectorDB | |
| # Ejecutar la aplicación | |
| if __name__ == "__main__": | |
| bedrock_llm=initLLM() | |
| bedrock_embedder=initEmbedder() | |
| chromaDB=embedding("el principito.pdf",bedrock_embedder) | |
| print(chromaDB) |