Commit ·
cd06c0f
1
Parent(s): 7b2c669
[KM-556] move parquet_service from knowledge/ to storage/
Browse files
src/knowledge/processing_service.py
CHANGED
|
@@ -7,7 +7,7 @@ from src.storage.az_blob.az_blob import blob_storage
|
|
| 7 |
from src.db.postgres.models import Document as DBDocument
|
| 8 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 9 |
from src.middlewares.logging import get_logger
|
| 10 |
-
from src.
|
| 11 |
from typing import List
|
| 12 |
from datetime import datetime, timezone, timedelta
|
| 13 |
import sys
|
|
|
|
| 7 |
from src.db.postgres.models import Document as DBDocument
|
| 8 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 9 |
from src.middlewares.logging import get_logger
|
| 10 |
+
from src.storage.parquet import upload_parquet
|
| 11 |
from typing import List
|
| 12 |
from datetime import datetime, timezone, timedelta
|
| 13 |
import sys
|
src/query/executor/tabular.py
CHANGED
|
@@ -20,7 +20,7 @@ from typing import Any
|
|
| 20 |
import pandas as pd
|
| 21 |
|
| 22 |
from ...catalog.models import Catalog, Source, Table
|
| 23 |
-
from ...
|
| 24 |
from ...middlewares.logging import get_logger
|
| 25 |
from ..compiler.pandas import CompiledPandas, PandasCompiler
|
| 26 |
from ..ir.models import QueryIR
|
|
|
|
| 20 |
import pandas as pd
|
| 21 |
|
| 22 |
from ...catalog.models import Catalog, Source, Table
|
| 23 |
+
from ...storage.parquet import parquet_blob_name
|
| 24 |
from ...middlewares.logging import get_logger
|
| 25 |
from ..compiler.pandas import CompiledPandas, PandasCompiler
|
| 26 |
from ..ir.models import QueryIR
|
src/query/executors/tabular.py
CHANGED
|
@@ -19,7 +19,7 @@ from pydantic import BaseModel
|
|
| 19 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 20 |
|
| 21 |
from src.config.settings import settings
|
| 22 |
-
from src.
|
| 23 |
from src.middlewares.logging import get_logger
|
| 24 |
from src.query.base import BaseExecutor, QueryResult
|
| 25 |
from src.retrieval.base import RetrievalResult
|
|
|
|
| 19 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 20 |
|
| 21 |
from src.config.settings import settings
|
| 22 |
+
from src.storage.parquet import download_parquet
|
| 23 |
from src.middlewares.logging import get_logger
|
| 24 |
from src.query.base import BaseExecutor, QueryResult
|
| 25 |
from src.retrieval.base import RetrievalResult
|
src/{knowledge/parquet_service.py → storage/parquet.py}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""Parquet
|
| 2 |
|
| 3 |
Parquet files are stored in Azure Blob alongside the original document using
|
| 4 |
a deterministic naming convention based on document_id:
|
|
|
|
| 1 |
+
"""Parquet storage helpers — converts, uploads, downloads, and deletes Parquet files for CSV/XLSX.
|
| 2 |
|
| 3 |
Parquet files are stored in Azure Blob alongside the original document using
|
| 4 |
a deterministic naming convention based on document_id:
|