Spaces:
Configuration error
Configuration error
feat: Introduce new backend architecture with notebooks, sources, chat, and CLaRa models, alongside database schema and updated deployment scripts, while removing old frontend, deployment files, and previous backend components.
88f8604 | """ | |
| Antigravity Notebook - Sources Router | |
| API endpoints for source management (PDF upload, URL scraping, text ingestion). | |
| """ | |
| from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form | |
| from sqlalchemy.orm import Session | |
| from typing import List | |
| from uuid import UUID | |
| from backend.database import get_db, Notebook, Source, LatentTensor | |
| from backend.models.schemas import SourceResponse, SourceURL, SourceText, IngestionStatus | |
| from backend.models.clara import get_clara_model | |
| from backend.services.storage import get_storage_service | |
| from backend.services.ingestion import get_ingestion_service | |
| router = APIRouter(prefix="/sources", tags=["sources"]) | |
| async def upload_pdf( | |
| notebook_id: UUID, | |
| file: UploadFile = File(...), | |
| db: Session = Depends(get_db) | |
| ): | |
| """Upload and ingest a PDF file""" | |
| # Verify notebook exists | |
| notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first() | |
| if not notebook: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Notebook {notebook_id} not found" | |
| ) | |
| # Validate file type | |
| if not file.filename.lower().endswith('.pdf'): | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail="Only PDF files are supported" | |
| ) | |
| try: | |
| # Read file content | |
| file_content = await file.read() | |
| # Get services | |
| clara = get_clara_model() | |
| storage = get_storage_service() | |
| ingestion = get_ingestion_service(clara, storage) | |
| # Ingest PDF | |
| source = ingestion.ingest_pdf( | |
| db=db, | |
| notebook_id=notebook_id, | |
| file_content=file_content, | |
| filename=file.filename | |
| ) | |
| # Get tensor stats | |
| tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all() | |
| tensor_count = len(tensors) | |
| total_tokens = sum(t.token_count for t in tensors) | |
| return SourceResponse( | |
| id=source.id, | |
| notebook_id=source.notebook_id, | |
| source_type=source.source_type, | |
| filename=source.filename, | |
| url=source.url, | |
| created_at=source.created_at, | |
| metadata=source.metadata or {}, | |
| tensor_count=tensor_count, | |
| total_tokens=total_tokens | |
| ) | |
| except ValueError as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail=str(e) | |
| ) | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"Failed to process PDF: {str(e)}" | |
| ) | |
| def add_url_source( | |
| notebook_id: UUID, | |
| url_data: SourceURL, | |
| db: Session = Depends(get_db) | |
| ): | |
| """Add a URL as a source""" | |
| # Verify notebook exists | |
| notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first() | |
| if not notebook: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Notebook {notebook_id} not found" | |
| ) | |
| try: | |
| # Get services | |
| clara = get_clara_model() | |
| storage = get_storage_service() | |
| ingestion = get_ingestion_service(clara, storage) | |
| # Ingest URL | |
| source = ingestion.ingest_url( | |
| db=db, | |
| notebook_id=notebook_id, | |
| url=str(url_data.url), | |
| title=url_data.title | |
| ) | |
| # Get tensor stats | |
| tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all() | |
| tensor_count = len(tensors) | |
| total_tokens = sum(t.token_count for t in tensors) | |
| return SourceResponse( | |
| id=source.id, | |
| notebook_id=source.notebook_id, | |
| source_type=source.source_type, | |
| filename=source.filename, | |
| url=source.url, | |
| created_at=source.created_at, | |
| metadata=source.metadata or {}, | |
| tensor_count=tensor_count, | |
| total_tokens=total_tokens | |
| ) | |
| except ValueError as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail=str(e) | |
| ) | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"Failed to process URL: {str(e)}" | |
| ) | |
| def add_text_source( | |
| notebook_id: UUID, | |
| text_data: SourceText, | |
| db: Session = Depends(get_db) | |
| ): | |
| """Add plain text as a source""" | |
| # Verify notebook exists | |
| notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first() | |
| if not notebook: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Notebook {notebook_id} not found" | |
| ) | |
| try: | |
| # Get services | |
| clara = get_clara_model() | |
| storage = get_storage_service() | |
| ingestion = get_ingestion_service(clara, storage) | |
| # Ingest text | |
| source = ingestion.ingest_text( | |
| db=db, | |
| notebook_id=notebook_id, | |
| content=text_data.content, | |
| title=text_data.title | |
| ) | |
| # Get tensor stats | |
| tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all() | |
| tensor_count = len(tensors) | |
| total_tokens = sum(t.token_count for t in tensors) | |
| return SourceResponse( | |
| id=source.id, | |
| notebook_id=source.notebook_id, | |
| source_type=source.source_type, | |
| filename=source.filename, | |
| url=source.url, | |
| created_at=source.created_at, | |
| metadata=source.metadata or {}, | |
| tensor_count=tensor_count, | |
| total_tokens=total_tokens | |
| ) | |
| except ValueError as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_400_BAD_REQUEST, | |
| detail=str(e) | |
| ) | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, | |
| detail=f"Failed to process text: {str(e)}" | |
| ) | |
| def list_sources(notebook_id: UUID, db: Session = Depends(get_db)): | |
| """List all sources in a notebook""" | |
| # Verify notebook exists | |
| notebook = db.query(Notebook).filter(Notebook.id == notebook_id).first() | |
| if not notebook: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Notebook {notebook_id} not found" | |
| ) | |
| sources = db.query(Source).filter(Source.notebook_id == notebook_id).all() | |
| response = [] | |
| for source in sources: | |
| tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source.id).all() | |
| tensor_count = len(tensors) | |
| total_tokens = sum(t.token_count for t in tensors) | |
| response.append(SourceResponse( | |
| id=source.id, | |
| notebook_id=source.notebook_id, | |
| source_type=source.source_type, | |
| filename=source.filename, | |
| url=source.url, | |
| created_at=source.created_at, | |
| metadata=source.metadata or {}, | |
| tensor_count=tensor_count, | |
| total_tokens=total_tokens | |
| )) | |
| return response | |
| def get_source(source_id: UUID, db: Session = Depends(get_db)): | |
| """Get source details""" | |
| source = db.query(Source).filter(Source.id == source_id).first() | |
| if not source: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Source {source_id} not found" | |
| ) | |
| tensors = db.query(LatentTensor).filter(LatentTensor.source_id == source_id).all() | |
| tensor_count = len(tensors) | |
| total_tokens = sum(t.token_count for t in tensors) | |
| return SourceResponse( | |
| id=source.id, | |
| notebook_id=source.notebook_id, | |
| source_type=source.source_type, | |
| filename=source.filename, | |
| url=source.url, | |
| created_at=source.created_at, | |
| metadata=source.metadata or {}, | |
| tensor_count=tensor_count, | |
| total_tokens=total_tokens | |
| ) | |
| def delete_source(source_id: UUID, db: Session = Depends(get_db)): | |
| """Delete a source and all associated tensors""" | |
| source = db.query(Source).filter(Source.id == source_id).first() | |
| if not source: | |
| raise HTTPException( | |
| status_code=status.HTTP_404_NOT_FOUND, | |
| detail=f"Source {source_id} not found" | |
| ) | |
| # Delete tensors from filesystem | |
| storage = get_storage_service() | |
| storage.delete_source_tensors(db, source_id, source.notebook_id) | |
| # Delete source (cascades to latent_tensors in DB) | |
| db.delete(source) | |
| db.commit() | |
| return None | |