pydocs-ai-api / src /services /database.py
yezdata's picture
cleanup - add local/ move unused preprocess code from api/ to local/
af3ecbd
from sqlalchemy import create_engine, text
import time
from loguru import logger
from config import DATABASE_URL, Chunk
if DATABASE_URL:
try:
engine = create_engine(DATABASE_URL)
except Exception:
logger.exception(
f"Invalid DB_URL format. Length of DB_URL: {len(DATABASE_URL)}"
)
raise ValueError("Invalid DB_URL format. Please check your env variables.")
else:
logger.error("DB_URL is not set. Database features will be disabled.")
raise ValueError("DB_URL is not set. Please set the DB_URL environment variable.")
def get_tables(max_attempts: int = 3) -> list[str]:
sql = text("""
SELECT
table_name
FROM
information_schema.tables
WHERE
table_schema = 'public';
""")
for attempt in range(max_attempts):
try:
with engine.begin() as conn:
result = conn.execute(sql).fetchall()
tables = [row[0] for row in result]
return tables
except Exception as e:
logger.warning(f"Attempts:{attempt}\nError getting tables: {e}")
time.sleep(2)
raise RuntimeError(f"Error getting tables after {max_attempts} attempts")
def search_similar(query_embedding: list, doc_name: str, k: int) -> list[Chunk]:
table_name = f"{doc_name}"
vec_string = "[" + ",".join(map(str, query_embedding)) + "]"
sql = text(f"""
SELECT
main_title,
chunk_title,
content
FROM {table_name}
ORDER BY (embedding <#> CAST(:embedding AS vector)) asc
LIMIT :k;
""")
with engine.begin() as conn:
rows = conn.execute(sql, {"embedding": vec_string, "k": k}).fetchall()
rows_out = []
for row in rows:
rows_out.append(Chunk(main_title=row[0], chunk_title=row[1], content=row[2]))
return rows_out