Athena1621's picture
feat: Introduce new backend architecture with notebooks, sources, chat, and CLaRa models, alongside database schema and updated deployment scripts, while removing old frontend, deployment files, and previous backend components.
88f8604
"""
Antigravity Notebook - Sources Router
API endpoints for source management (PDF upload, URL scraping, text ingestion).
"""
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
from sqlalchemy.orm import Session
from typing import List
from uuid import UUID
from backend.database import get_db, Notebook, Source, LatentTensor
from backend.models.schemas import SourceResponse, SourceURL, SourceText, IngestionStatus
from backend.models.clara import get_clara_model
from backend.services.storage import get_storage_service
from backend.services.ingestion import get_ingestion_service
router = APIRouter(prefix="/sources", tags=["sources"])
@router.post("/notebooks/{notebook_id}/sources/upload", response_model=SourceResponse)
async def upload_pdf(
notebook_id: UUID,
file: UploadFile = File(...),
db: Session = Depends(get_db)
):
"""Upload and ingest a PDF file"""
# Verify notebook exists
notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first()
if not notebook:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Notebook {notebook_id} not found"
)
# Validate file type
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Only PDF files are supported"
)
try:
# Read file content
file_content = await file.read()
# Get services
clara = get_clara_model()
storage = get_storage_service()
ingestion = get_ingestion_service(clara, storage)
# Ingest PDF
source = ingestion.ingest_pdf(
db=db,
notebook_id=notebook_id,
file_content=file_content,
filename=file.filename
)
# Get tensor stats
tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all()
tensor_count = len(tensors)
total_tokens = sum(t.token_count for t in tensors)
return SourceResponse(
id=source.id,
notebook_id=source.notebook_id,
source_type=source.source_type,
filename=source.filename,
url=source.url,
created_at=source.created_at,
metadata=source.metadata or {},
tensor_count=tensor_count,
total_tokens=total_tokens
)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to process PDF: {str(e)}"
)
@router.post("/notebooks/{notebook_id}/sources/url", response_model=SourceResponse)
def add_url_source(
notebook_id: UUID,
url_data: SourceURL,
db: Session = Depends(get_db)
):
"""Add a URL as a source"""
# Verify notebook exists
notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first()
if not notebook:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Notebook {notebook_id} not found"
)
try:
# Get services
clara = get_clara_model()
storage = get_storage_service()
ingestion = get_ingestion_service(clara, storage)
# Ingest URL
source = ingestion.ingest_url(
db=db,
notebook_id=notebook_id,
url=str(url_data.url),
title=url_data.title
)
# Get tensor stats
tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all()
tensor_count = len(tensors)
total_tokens = sum(t.token_count for t in tensors)
return SourceResponse(
id=source.id,
notebook_id=source.notebook_id,
source_type=source.source_type,
filename=source.filename,
url=source.url,
created_at=source.created_at,
metadata=source.metadata or {},
tensor_count=tensor_count,
total_tokens=total_tokens
)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to process URL: {str(e)}"
)
@router.post("/notebooks/{notebook_id}/sources/text", response_model=SourceResponse)
def add_text_source(
notebook_id: UUID,
text_data: SourceText,
db: Session = Depends(get_db)
):
"""Add plain text as a source"""
# Verify notebook exists
notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first()
if not notebook:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Notebook {notebook_id} not found"
)
try:
# Get services
clara = get_clara_model()
storage = get_storage_service()
ingestion = get_ingestion_service(clara, storage)
# Ingest text
source = ingestion.ingest_text(
db=db,
notebook_id=notebook_id,
content=text_data.content,
title=text_data.title
)
# Get tensor stats
tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all()
tensor_count = len(tensors)
total_tokens = sum(t.token_count for t in tensors)
return SourceResponse(
id=source.id,
notebook_id=source.notebook_id,
source_type=source.source_type,
filename=source.filename,
url=source.url,
created_at=source.created_at,
metadata=source.metadata or {},
tensor_count=tensor_count,
total_tokens=total_tokens
)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e)
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to process text: {str(e)}"
)
@router.get("/notebooks/{notebook_id}/sources", response_model=List[SourceResponse])
def list_sources(notebook_id: UUID, db: Session = Depends(get_db)):
"""List all sources in a notebook"""
# Verify notebook exists
notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first()
if not notebook:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Notebook {notebook_id} not found"
)
sources = db.query(Source).filter(Source.notebook_id == notebook_id).all()
response = []
for source in sources:
tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all()
tensor_count = len(tensors)
total_tokens = sum(t.token_count for t in tensors)
response.append(SourceResponse(
id=source.id,
notebook_id=source.notebook_id,
source_type=source.source_type,
filename=source.filename,
url=source.url,
created_at=source.created_at,
metadata=source.metadata or {},
tensor_count=tensor_count,
total_tokens=total_tokens
))
return response
@router.get("/sources/{source_id}", response_model=SourceResponse)
def get_source(source_id: UUID, db: Session = Depends(get_db)):
"""Get source details"""
source = db.query(Source).filter(Source.id == source_id).first()
if not source:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Source {source_id} not found"
)
tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source_id).all()
tensor_count = len(tensors)
total_tokens = sum(t.token_count for t in tensors)
return SourceResponse(
id=source.id,
notebook_id=source.notebook_id,
source_type=source.source_type,
filename=source.filename,
url=source.url,
created_at=source.created_at,
metadata=source.metadata or {},
tensor_count=tensor_count,
total_tokens=total_tokens
)
@router.delete("/sources/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_source(source_id: UUID, db: Session = Depends(get_db)):
"""Delete a source and all associated tensors"""
source = db.query(Source).filter(Source.id == source_id).first()
if not source:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Source {source_id} not found"
)
# Delete tensors from filesystem
storage = get_storage_service()
storage.delete_source_tensors(db, source_id, source.notebook_id)
# Delete source (cascades to latent_tensors in DB)
db.delete(source)
db.commit()
return None