Spaces:

yezdata
/

pydocs-ai-api

Sleeping

cleanup - add local/ move unused preprocess code from api/ to local/

af3ecbd 3 months ago

1.9 kB

	from sqlalchemy import create_engine, text
	import time
	from loguru import logger

	from config import DATABASE_URL, Chunk


	if DATABASE_URL:
	try:
	engine = create_engine(DATABASE_URL)
	except Exception:
	logger.exception(
	f"Invalid DB_URL format. Length of DB_URL: {len(DATABASE_URL)}"
	)
	raise ValueError("Invalid DB_URL format. Please check your env variables.")
	else:
	logger.error("DB_URL is not set. Database features will be disabled.")
	raise ValueError("DB_URL is not set. Please set the DB_URL environment variable.")


	def get_tables(max_attempts: int = 3) -> list[str]:
	sql = text("""
	SELECT
	table_name
	FROM
	information_schema.tables
	WHERE
	table_schema = 'public';
	""")

	for attempt in range(max_attempts):
	try:
	with engine.begin() as conn:
	result = conn.execute(sql).fetchall()

	tables = [row[0] for row in result]
	return tables

	except Exception as e:
	logger.warning(f"Attempts:{attempt}\nError getting tables: {e}")
	time.sleep(2)

	raise RuntimeError(f"Error getting tables after {max_attempts} attempts")


	def search_similar(query_embedding: list, doc_name: str, k: int) -> list[Chunk]:
	table_name = f"{doc_name}"
	vec_string = "[" + ",".join(map(str, query_embedding)) + "]"

	sql = text(f"""
	SELECT
	main_title,
	chunk_title,
	content
	FROM {table_name}
	ORDER BY (embedding <#> CAST(:embedding AS vector)) asc
	LIMIT :k;
	""")

	with engine.begin() as conn:
	rows = conn.execute(sql, {"embedding": vec_string, "k": k}).fetchall()

	rows_out = []
	for row in rows:
	rows_out.append(Chunk(main_title=row[0], chunk_title=row[1], content=row[2]))

	return rows_out