Spaces:
Sleeping
Sleeping
| from sqlalchemy import create_engine, text | |
| import time | |
| from loguru import logger | |
| from config import DATABASE_URL, Chunk | |
| if DATABASE_URL: | |
| try: | |
| engine = create_engine(DATABASE_URL) | |
| except Exception: | |
| logger.exception( | |
| f"Invalid DB_URL format. Length of DB_URL: {len(DATABASE_URL)}" | |
| ) | |
| raise ValueError("Invalid DB_URL format. Please check your env variables.") | |
| else: | |
| logger.error("DB_URL is not set. Database features will be disabled.") | |
| raise ValueError("DB_URL is not set. Please set the DB_URL environment variable.") | |
| def get_tables(max_attempts: int = 3) -> list[str]: | |
| sql = text(""" | |
| SELECT | |
| table_name | |
| FROM | |
| information_schema.tables | |
| WHERE | |
| table_schema = 'public'; | |
| """) | |
| for attempt in range(max_attempts): | |
| try: | |
| with engine.begin() as conn: | |
| result = conn.execute(sql).fetchall() | |
| tables = [row[0] for row in result] | |
| return tables | |
| except Exception as e: | |
| logger.warning(f"Attempts:{attempt}\nError getting tables: {e}") | |
| time.sleep(2) | |
| raise RuntimeError(f"Error getting tables after {max_attempts} attempts") | |
| def search_similar(query_embedding: list, doc_name: str, k: int) -> list[Chunk]: | |
| table_name = f"{doc_name}" | |
| vec_string = "[" + ",".join(map(str, query_embedding)) + "]" | |
| sql = text(f""" | |
| SELECT | |
| main_title, | |
| chunk_title, | |
| content | |
| FROM {table_name} | |
| ORDER BY (embedding <#> CAST(:embedding AS vector)) asc | |
| LIMIT :k; | |
| """) | |
| with engine.begin() as conn: | |
| rows = conn.execute(sql, {"embedding": vec_string, "k": k}).fetchall() | |
| rows_out = [] | |
| for row in rows: | |
| rows_out.append(Chunk(main_title=row[0], chunk_title=row[1], content=row[2])) | |
| return rows_out | |