Spaces:

Athena1621
/

translation_app

Configuration error

translation_app / backend /services /context_manager.py

feat: Introduce new backend architecture with notebooks, sources, chat, and CLaRa models, alongside database schema and updated deployment scripts, while removing old frontend, deployment files, and previous backend components.

88f8604 about 2 months ago

raw

history blame contribute delete

8 kB

	"""
	Antigravity Notebook - Context Manager
	The "Brain" that makes whole-notebook reasoning possible.

	This service implements the key NotebookLM functionality:
	- If the notebook fits in context: Load EVERYTHING (true whole-notebook reasoning)
	- If the notebook is too large: Intelligently select the most relevant parts
	"""

	import torch
	from typing import List, Dict, Tuple
	from sqlalchemy.orm import Session
	from uuid import UUID

	from backend.config import settings
	from backend.models.clara import ClaraModel
	from backend.services.storage import StorageService


	class ContextManager:
	"""
	Manages context preparation for notebook queries.

	The magic of NotebookLM is "whole-context awareness". Since CLaRa
	compresses text by ~16x, we can fit 10-20 books worth of content
	in a single context window.
	"""

	def __init__(
	self,
	clara: ClaraModel,
	storage: StorageService,
	max_tokens: int = None
	):
	self.clara = clara
	self.storage = storage
	self.max_tokens = max_tokens or settings.MAX_CONTEXT_TOKENS

	print(f"✅ ContextManager initialized (max tokens: {self.max_tokens})")

	def prepare_notebook_context(
	self,
	db: Session,
	notebook_id: UUID,
	query: str
	) -> Tuple[torch.Tensor, List[Dict], Dict]:
	"""
	The Magic Function: Prepares context for a notebook query.

	This is where the NotebookLM magic happens. We decide what
	memories to load into the AI's brain.

	Args:
	db: Database session
	notebook_id: Notebook UUID
	query: User query

	Returns:
	Tuple of:
	- Combined latent tensor (stacked context)
	- List of source metadata (for citations)
	- Statistics dict (token usage, etc.)
	"""
	print(f"\n🧠 Preparing context for notebook {notebook_id}")

	# 1. Fetch ALL compressed tensors for this notebook
	all_tensor_data = self.storage.get_notebook_tensors(db, notebook_id)

	if not all_tensor_data:
	raise ValueError(f"No tensors found for notebook {notebook_id}")

	print(f"📚 Found {len(all_tensor_data)} tensor segments across sources")

	# Extract tensors and metadata
	all_tensors = [td["tensor"] for td in all_tensor_data]
	source_map = [
	{
	"source_id": str(td["source_id"]),
	"filename": td["source_filename"],
	"source_type": td["source_type"],
	"segment_index": td["segment_index"]
	}
	for td in all_tensor_data
	]

	# 2. Calculate total token count
	total_tokens = sum(self.clara.get_token_count(t) for t in all_tensors)

	print(f"📊 Total tokens: {total_tokens} / {self.max_tokens} max")

	# 3. Decision: Whole-notebook vs. Selective
	if total_tokens <= self.max_tokens:
	# ✨ SCENARIO A: The "Whole Notebook" fits!
	# We stack them all. The AI reads EVERYTHING.
	print("✅ Full notebook fits! Using WHOLE-NOTEBOOK reasoning")

	combined_context = torch.cat(all_tensors, dim=1)
	selected_sources = source_map
	strategy = "full_notebook"

	else:
	# 🎯 SCENARIO B: Too big (e.g., 50 books)
	# We must use CLaRa's retrieval to pick the best parts
	print(f"⚠️ Notebook too large ({total_tokens} tokens). Using SELECTIVE retrieval")

	combined_context, selected_sources = self._selective_retrieval(
	query,
	all_tensors,
	source_map
	)
	strategy = "selective_retrieval"

	# 4. Generate statistics
	stats = {
	"total_segments": len(all_tensor_data),
	"total_tokens": total_tokens,
	"selected_segments": len(selected_sources),
	"selected_tokens": self.clara.get_token_count(combined_context),
	"max_tokens": self.max_tokens,
	"context_usage_percent": round(
	(self.clara.get_token_count(combined_context) / self.max_tokens) * 100,
	2
	),
	"strategy": strategy,
	"can_fit_full_context": total_tokens <= self.max_tokens
	}

	print(f"📈 Context prepared: {stats['selected_tokens']} tokens ({stats['context_usage_percent']}% usage)")

	return combined_context, selected_sources, stats

	def _selective_retrieval(
	self,
	query: str,
	tensors: List[torch.Tensor],
	source_map: List[Dict]
	) -> Tuple[torch.Tensor, List[Dict]]:
	"""
	Selective retrieval: Pick the most relevant tensors that fit in budget.

	Uses CLaRa's ranking to score tensors by relevance to the query,
	then greedily selects the highest-scoring tensors until we hit
	the token budget.

	Args:
	query: User query
	tensors: All available tensors
	source_map: Metadata for each tensor

	Returns:
	Tuple of (combined tensor, selected source metadata)
	"""
	print("🔍 Ranking tensors by relevance...")

	# Score all tensors against the query
	scores = self.clara.rank_latents(query, tensors)

	# Create scored list
	scored_tensors = [
	{
	"tensor": tensors[i],
	"source": source_map[i],
	"score": scores[i],
	"tokens": self.clara.get_token_count(tensors[i])
	}
	for i in range(len(tensors))
	]

	# Sort by score (highest first)
	scored_tensors.sort(key=lambda x: x["score"], reverse=True)

	# Greedy selection (knapsack problem)
	selected = []
	total_tokens = 0

	for item in scored_tensors:
	if total_tokens + item["tokens"] <= self.max_tokens:
	selected.append(item)
	total_tokens += item["tokens"]
	else:
	# Would exceed budget, skip
	continue

	print(f"✅ Selected {len(selected)}/{len(tensors)} segments ({total_tokens} tokens)")

	# Combine selected tensors
	selected_tensors = [s["tensor"] for s in selected]
	selected_sources = [s["source"] for s in selected]

	combined = torch.cat(selected_tensors, dim=1) if selected_tensors else tensors[0]

	return combined, selected_sources

	def get_notebook_stats(
	self,
	db: Session,
	notebook_id: UUID
	) -> Dict:
	"""
	Get statistics about a notebook's context usage.

	Useful for showing users how much of their context budget
	is being used (like the memory meter in the UI).

	Args:
	db: Database session
	notebook_id: Notebook UUID

	Returns:
	Statistics dictionary
	"""
	all_tensor_data = self.storage.get_notebook_tensors(db, notebook_id)

	if not all_tensor_data:
	return {
	"total_segments": 0,
	"total_tokens": 0,
	"max_tokens": self.max_tokens,
	"context_usage_percent": 0.0,
	"can_fit_full_context": True
	}

	all_tensors = [td["tensor"] for td in all_tensor_data]
	total_tokens = sum(self.clara.get_token_count(t) for t in all_tensors)

	return {
	"total_segments": len(all_tensor_data),
	"total_tokens": total_tokens,
	"max_tokens": self.max_tokens,
	"context_usage_percent": round((total_tokens / self.max_tokens) * 100, 2),
	"can_fit_full_context": total_tokens <= self.max_tokens
	}


	def get_context_manager(
	clara: ClaraModel,
	storage: StorageService
	) -> ContextManager:
	"""Factory function to create ContextManager instance"""
	return ContextManager(clara, storage)