Spaces:

NotShrirang
/

LoomRAG

Sleeping

feat: add more sources for data

a409078 about 1 year ago

788 Bytes

	import bs4
	import os
	from langchain_text_splitters import CharacterTextSplitter
	import requests
	import streamlit as st
	import sys
	from vectordb import add_image_to_index, add_pdf_to_index, update_vectordb

	sys.path.append(os.path.dirname(os.path.abspath(__file__)))


	def process_text(text: str, text_embedding_model):
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=1200,
	chunk_overlap=200,
	length_function=len,
	is_separator_regex=False,
	)
	chunks = text_splitter.split_text(text)
	text_embeddings = text_embedding_model.encode(chunks)
	for chunk, embedding in zip(chunks, text_embeddings):
	index = update_vectordb(index_path="text_index.index", embedding=embedding, text_content=chunk)
	return index