Spaces:

hanszhu
/

Dense-Captioning-Platform

Sleeping

App Files Files Community

Dense-Captioning-Platform / custom_models /custom_cascade_with_meta.py

hanszhu

build(space): initial Docker Space with Gradio app, MMDet, SAM integration

eb4d305 9 months ago

raw

history blame contribute delete

6.81 kB

	from mmdet.models.detectors import CascadeRCNN
	from mmdet.registry import MODELS
	import torch
	import torch.nn as nn

	@MODELS.register_module()
	class CustomCascadeWithMeta(CascadeRCNN):
	"""Custom Cascade R-CNN with metadata prediction heads."""

	def __init__(self,
	*args,
	chart_cls_head=None,
	plot_reg_head=None,
	axes_info_head=None,
	data_series_head=None,
	data_points_count_head=None,
	coordinate_standardization=None,
	data_series_config=None,
	axis_aware_feature=None,
	**kwargs):
	super().__init__(args, *kwargs)

	# Initialize metadata prediction heads
	if chart_cls_head is not None:
	self.chart_cls_head = MODELS.build(chart_cls_head)
	if plot_reg_head is not None:
	self.plot_reg_head = MODELS.build(plot_reg_head)
	if axes_info_head is not None:
	self.axes_info_head = MODELS.build(axes_info_head)
	if data_series_head is not None:
	self.data_series_head = MODELS.build(data_series_head)
	if data_points_count_head is not None:
	self.data_points_count_head = MODELS.build(data_points_count_head)
	else:
	# Default simple regression head for data point count
	self.data_points_count_head = nn.Sequential(
	nn.Linear(2048, 512), # Assuming ResNet-50 backbone features
	nn.ReLU(),
	nn.Dropout(0.1),
	nn.Linear(512, 1) # Single output for count
	)

	# Store configurations
	self.coordinate_standardization = coordinate_standardization
	self.data_series_config = data_series_config
	self.axis_aware_feature = axis_aware_feature

	def forward_train(self, img, img_metas, gt_bboxes, gt_labels, **kwargs):
	"""Forward function during training."""
	# Get base detector predictions
	x = self.extract_feat(img)
	losses = dict()

	# RPN forward and loss
	if self.with_rpn:
	proposal_cfg = self.train_cfg.get('rpn_proposal',
	self.test_cfg.rpn)
	rpn_losses, proposal_list = self.rpn_head.forward_train(
	x,
	img_metas,
	gt_bboxes,
	gt_labels=None,
	ann_weight=None,
	proposal_cfg=proposal_cfg)
	losses.update(rpn_losses)
	else:
	proposal_list = kwargs.get('proposals', None)

	# ROI forward and loss
	roi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,
	gt_bboxes, gt_labels, **kwargs)
	losses.update(roi_losses)

	# Get global features for metadata prediction
	global_feat = x[-1].mean(dim=[2, 3]) # Global average pooling

	# Extract ground truth data point counts from img_metas
	gt_data_point_counts = []
	for img_meta in img_metas:
	count = img_meta.get('img_info', {}).get('num_data_points', 0)
	gt_data_point_counts.append(count)
	gt_data_point_counts = torch.tensor(gt_data_point_counts, dtype=torch.float32, device=global_feat.device)

	# Predict data point counts and compute loss
	pred_data_point_counts = self.data_points_count_head(global_feat).squeeze(-1)
	data_points_count_loss = nn.MSELoss()(pred_data_point_counts, gt_data_point_counts)
	losses['data_points_count_loss'] = data_points_count_loss

	# Use predicted data point count as additional feature for ROI head
	# Expand the global feature with data point count information
	normalized_counts = torch.sigmoid(pred_data_point_counts / 100.0) # Normalize to 0-1 range
	enhanced_global_feat = torch.cat([global_feat, normalized_counts.unsqueeze(-1)], dim=-1)

	# Metadata prediction losses
	if hasattr(self, 'chart_cls_head'):
	chart_cls_loss = self.chart_cls_head(enhanced_global_feat)
	losses['chart_cls_loss'] = chart_cls_loss

	if hasattr(self, 'plot_reg_head'):
	plot_reg_loss = self.plot_reg_head(enhanced_global_feat)
	losses['plot_reg_loss'] = plot_reg_loss

	if hasattr(self, 'axes_info_head'):
	axes_info_loss = self.axes_info_head(enhanced_global_feat)
	losses['axes_info_loss'] = axes_info_loss

	if hasattr(self, 'data_series_head'):
	data_series_loss = self.data_series_head(enhanced_global_feat)
	losses['data_series_loss'] = data_series_loss

	return losses

	def simple_test(self, img, img_metas, **kwargs):
	"""Test without augmentation."""
	x = self.extract_feat(img)
	proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
	det_bboxes, det_labels = self.roi_head.simple_test_bboxes(
	x, img_metas, proposal_list, self.test_cfg.rcnn, **kwargs)

	# Get global features for metadata prediction
	global_feat = x[-1].mean(dim=[2, 3]) # Global average pooling

	# Predict data point counts
	pred_data_point_counts = self.data_points_count_head(global_feat).squeeze(-1)

	# Use predicted data point count as additional feature
	normalized_counts = torch.sigmoid(pred_data_point_counts / 100.0) # Normalize to 0-1 range
	enhanced_global_feat = torch.cat([global_feat, normalized_counts.unsqueeze(-1)], dim=-1)

	# Get metadata predictions
	results = []
	for i, (bboxes, labels) in enumerate(zip(det_bboxes, det_labels)):
	result = DetDataSample()
	result.bboxes = bboxes
	result.labels = labels

	# Add data point count prediction
	result.predicted_data_points = pred_data_point_counts[i].item()

	# Add metadata predictions using enhanced features
	if hasattr(self, 'chart_cls_head'):
	result.chart_type = self.chart_cls_head(enhanced_global_feat[i:i+1])
	if hasattr(self, 'plot_reg_head'):
	result.plot_bb = self.plot_reg_head(enhanced_global_feat[i:i+1])
	if hasattr(self, 'axes_info_head'):
	result.axes_info = self.axes_info_head(enhanced_global_feat[i:i+1])
	if hasattr(self, 'data_series_head'):
	result.data_series = self.data_series_head(enhanced_global_feat[i:i+1])

	results.append(result)

	return results