Spaces:
Sleeping
Sleeping
Initial WorkWise backend GPU
Browse files- app/routes/ingest_routes.py +45 -10
app/routes/ingest_routes.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
"""Routes for data ingestion"""
|
|
|
|
|
|
|
| 2 |
import spaces
|
| 3 |
-
from fastapi import APIRouter, HTTPException
|
| 4 |
-
from app.models.jira_schema import
|
| 5 |
from app.services.data_ingestion import DataIngestionService
|
| 6 |
from app.services.embeddings import embedding_service
|
| 7 |
from app.services.vector_store import vector_store
|
|
@@ -11,19 +13,41 @@ logger = setup_logger(__name__)
|
|
| 11 |
router = APIRouter()
|
| 12 |
|
| 13 |
@router.post("/ingest", response_model=IngestResponse)
|
| 14 |
-
async def ingest_data(
|
| 15 |
"""
|
| 16 |
-
Ingest Jira data from CSV/JSON file
|
| 17 |
|
|
|
|
| 18 |
- Parses the file
|
| 19 |
- Generates embeddings
|
| 20 |
-
- Stores in
|
| 21 |
"""
|
|
|
|
|
|
|
| 22 |
try:
|
| 23 |
-
logger.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
if not records:
|
| 29 |
raise HTTPException(status_code=400, detail="No records found in file")
|
|
@@ -40,14 +64,25 @@ async def ingest_data(request: IngestRequest):
|
|
| 40 |
# Store vectors
|
| 41 |
count = vector_store.upsert_vectors(embeddings, records)
|
| 42 |
|
| 43 |
-
logger.info(f"Successfully indexed {count} records")
|
| 44 |
|
| 45 |
return IngestResponse(
|
| 46 |
status="success",
|
| 47 |
records_indexed=count,
|
| 48 |
-
message=f"Successfully ingested and indexed {count} Jira tickets"
|
| 49 |
)
|
| 50 |
|
|
|
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
logger.error(f"Ingestion failed: {str(e)}")
|
| 53 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Routes for data ingestion"""
|
| 2 |
+
import os
|
| 3 |
+
import tempfile
|
| 4 |
import spaces
|
| 5 |
+
from fastapi import APIRouter, HTTPException, UploadFile, File
|
| 6 |
+
from app.models.jira_schema import IngestResponse
|
| 7 |
from app.services.data_ingestion import DataIngestionService
|
| 8 |
from app.services.embeddings import embedding_service
|
| 9 |
from app.services.vector_store import vector_store
|
|
|
|
| 13 |
router = APIRouter()
|
| 14 |
|
| 15 |
@router.post("/ingest", response_model=IngestResponse)
|
| 16 |
+
async def ingest_data(file: UploadFile = File(...)):
|
| 17 |
"""
|
| 18 |
+
Ingest Jira data from uploaded CSV/JSON file
|
| 19 |
|
| 20 |
+
- Accepts file upload
|
| 21 |
- Parses the file
|
| 22 |
- Generates embeddings
|
| 23 |
+
- Stores in vector database
|
| 24 |
"""
|
| 25 |
+
temp_file_path = None
|
| 26 |
+
|
| 27 |
try:
|
| 28 |
+
logger.info(f"Receiving file upload: {file.filename}")
|
| 29 |
+
|
| 30 |
+
# Validate file type
|
| 31 |
+
if not file.filename.endswith(('.csv', '.json')):
|
| 32 |
+
raise HTTPException(
|
| 33 |
+
status_code=400,
|
| 34 |
+
detail="Invalid file type. Only CSV and JSON files are supported."
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Create temporary file to store upload
|
| 38 |
+
suffix = os.path.splitext(file.filename)[1]
|
| 39 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
| 40 |
+
temp_file_path = temp_file.name
|
| 41 |
+
|
| 42 |
+
# Read and write uploaded file content
|
| 43 |
+
contents = await file.read()
|
| 44 |
+
temp_file.write(contents)
|
| 45 |
+
temp_file.flush()
|
| 46 |
|
| 47 |
+
logger.info(f"File saved temporarily at: {temp_file_path}")
|
| 48 |
+
|
| 49 |
+
# Load data from temporary file
|
| 50 |
+
records = DataIngestionService.load_data(temp_file_path)
|
| 51 |
|
| 52 |
if not records:
|
| 53 |
raise HTTPException(status_code=400, detail="No records found in file")
|
|
|
|
| 64 |
# Store vectors
|
| 65 |
count = vector_store.upsert_vectors(embeddings, records)
|
| 66 |
|
| 67 |
+
logger.info(f"Successfully indexed {count} records from {file.filename}")
|
| 68 |
|
| 69 |
return IngestResponse(
|
| 70 |
status="success",
|
| 71 |
records_indexed=count,
|
| 72 |
+
message=f"Successfully ingested and indexed {count} Jira tickets from {file.filename}"
|
| 73 |
)
|
| 74 |
|
| 75 |
+
except HTTPException:
|
| 76 |
+
raise
|
| 77 |
except Exception as e:
|
| 78 |
logger.error(f"Ingestion failed: {str(e)}")
|
| 79 |
raise HTTPException(status_code=500, detail=str(e))
|
| 80 |
+
|
| 81 |
+
finally:
|
| 82 |
+
# Clean up temporary file
|
| 83 |
+
if temp_file_path and os.path.exists(temp_file_path):
|
| 84 |
+
try:
|
| 85 |
+
os.unlink(temp_file_path)
|
| 86 |
+
logger.info(f"Cleaned up temporary file: {temp_file_path}")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
logger.warning(f"Failed to delete temporary file: {e}")
|