Source code for alpharotate.utils.gaussian_metric

# -*- coding: utf-8 -*-

# Author: Xue Yang <yangxue-2019-sjtu@sjtu.edu.cn>, <yangxue0827@126.com>
# License: Apache-2.0 license
# Copyright (c) SJTU. ALL Rights Reserved.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np


[docs]def box2gaussian(boxes1, boxes2): """ Convert box :math:`(x,y,w,h,\theta)` to Gaussian distribution :math:`(\mathbf \mu, \mathbf \Sigma)` :param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, [-1, 5] :param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, [-1, 5] :return: :math:`(\mathbf \mu, \mathbf \Sigma)` """ x1, y1, w1, h1, theta1 = tf.unstack(boxes1, axis=1) x2, y2, w2, h2, theta2 = tf.unstack(boxes2, axis=1) x1 = tf.reshape(x1, [-1, 1]) y1 = tf.reshape(y1, [-1, 1]) h1 = tf.reshape(h1, [-1, 1]) w1 = tf.reshape(w1, [-1, 1]) theta1 = tf.reshape(theta1, [-1, 1]) x2 = tf.reshape(x2, [-1, 1]) y2 = tf.reshape(y2, [-1, 1]) h2 = tf.reshape(h2, [-1, 1]) w2 = tf.reshape(w2, [-1, 1]) theta2 = tf.reshape(theta2, [-1, 1]) # theta1 *= (np.pi / 180) # theta2 *= (np.pi / 180) sigma1_1 = w1 / 2 * tf.cos(theta1) ** 2 + h1 / 2 * tf.sin(theta1) ** 2 sigma1_2 = w1 / 2 * tf.sin(theta1) * tf.cos(theta1) - h1 / 2 * tf.sin(theta1) * tf.cos(theta1) sigma1_3 = w1 / 2 * tf.sin(theta1) * tf.cos(theta1) - h1 / 2 * tf.sin(theta1) * tf.cos(theta1) sigma1_4 = w1 / 2 * tf.sin(theta1) ** 2 + h1 / 2 * tf.cos(theta1) ** 2 sigma1 = tf.reshape(tf.concat([sigma1_1, sigma1_2, sigma1_3, sigma1_4], axis=-1), [-1, 2, 2]) sigma2_1 = w2 / 2 * tf.cos(theta2) ** 2 + h2 / 2 * tf.sin(theta2) ** 2 sigma2_2 = w2 / 2 * tf.sin(theta2) * tf.cos(theta2) - h2 / 2 * tf.sin(theta2) * tf.cos(theta2) sigma2_3 = w2 / 2 * tf.sin(theta2) * tf.cos(theta2) - h2 / 2 * tf.sin(theta2) * tf.cos(theta2) sigma2_4 = w2 / 2 * tf.sin(theta2) ** 2 + h2 / 2 * tf.cos(theta2) ** 2 sigma2 = tf.reshape(tf.concat([sigma2_1, sigma2_2, sigma2_3, sigma2_4], axis=-1), [-1, 2, 2]) return x1, y1, x2, y2, sigma1, sigma2
def qbox2gaussian(boxes, num_pts): x = tf.reshape(tf.reduce_mean(boxes[:, ::2], axis=1), [-1, 1]) y = tf.reshape(tf.reduce_mean(boxes[:, 1::2], axis=1), [-1, 1]) mu = tf.reshape(tf.concat([x, y], axis=-1), [-1, 1, 2]) sigma = tf.linalg.matmul((tf.reshape(boxes[:, :2], [-1, 1, 2]) - mu), tf.reshape(boxes[:, :2], [-1, 1, 2]) - mu, transpose_a=True) for n in range(num_pts-1): sigma += tf.linalg.matmul((tf.reshape(boxes[:, (n+1)*2:(n+2)*2], [-1, 1, 2]) - mu), tf.linalg.transpose(tf.reshape(boxes[:, (n+1)*2:(n+2)*2], [-1, 1, 2]) - mu)) sigma /= num_pts return mu, sigma
[docs]def wasserstein_distance_item2(sigma1, sigma2): """ Calculate the second term of wasserstein distance: :math:`\mathbf Tr(\mathbf \Sigma_{1} + \mathbf \Sigma_{2} - 2(\mathbf \Sigma_{1}^{1/2}\mathbf \Sigma_{2}\mathbf \Sigma_{1}^{1/2})^{1/2})` :param sigma1: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2] :param sigma2: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2] :return: the second term of wasserstein distance """ wasserstein_diss_item2 = tf.linalg.matmul(sigma1, sigma1) + tf.linalg.matmul(sigma2, sigma2) - 2 * tf.linalg.sqrtm( tf.linalg.matmul(tf.linalg.matmul(sigma1, tf.linalg.matmul(sigma2, sigma2)), sigma1)) wasserstein_diss_item2 = tf.linalg.trace(wasserstein_diss_item2) return wasserstein_diss_item2
[docs]def gaussian_wasserstein_distance(boxes1, boxes2): """ Calculate the wasserstein distance between boxes1 and boxes2: :math:`\mathbf D_{w} = ||\mathbf \mu_{1} - \mathbf \mu_{2}||^{2}_{2} + \mathbf Tr(\mathbf \Sigma_{1} + \mathbf \Sigma_{2} - 2(\mathbf \Sigma_{1}^{1/2}\mathbf \Sigma_{2}\mathbf \Sigma_{1}^{1/2})^{1/2})` :param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, shape: [-1, 5] :param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, shape: [-1, 5] :return: wasserstein distance, :math:`\mathbf D_{w}` """ x1, y1, x2, y2, sigma1, sigma2 = box2gaussian(boxes1, boxes2) wasserstein_dis_item1 = (x1 - x2) ** 2 + (y1 - y2) ** 2 wasserstein_dis_item2 = tf.reshape(wasserstein_distance_item2(sigma1, sigma2), [-1, 1]) wasserstein_distance = wasserstein_dis_item1 + wasserstein_dis_item2 return wasserstein_distance
[docs]def kullback_leibler_divergence(mu1, mu2, mu1_T, mu2_T, sigma1, sigma2): """ Calculate the kullback-leibler divergence between two Gaussian distributions : :math:`\mathbf D_{kl} = 0.5*((\mathbf \mu_{1}-\mathbf \mu_{2})^T \mathbf \Sigma_{2}^{1/2}(\mathbf \mu_{1}-\mathbf \mu_{2})+0.5*\mathbf Tr(\mathbf \Sigma_{2}^{-1} \mathbf \Sigma_{1})+0.5*\ln |\mathbf \Sigma_{2}|/|\mathbf \Sigma_{1}| -1` :param mu1: mean :math:`(\mu_{1})` of the Gaussian distribution, shape: [-1, 1, 2] :param mu2: mean :math:`(\mu_{2})` of the Gaussian distribution, shape: [-1, 1, 2] :param mu1_T: transposition of :math:`(\mu_{1})`, shape: [-1, 2, 1] :param mu2_T: transposition of :math:`(\mu_{2})`, shape: [-1, 2, 1] :param sigma1: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2] :param sigma2: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2] :return: kullback-leibler divergence, :math:`\mathbf D_{kl}` """ sigma1_square = tf.linalg.matmul(sigma1, sigma1) sigma2_square = tf.linalg.matmul(sigma2, sigma2) item1 = tf.linalg.trace(tf.linalg.matmul(tf.linalg.inv(sigma2_square), sigma1_square)) item2 = tf.linalg.matmul(tf.linalg.matmul(mu2 - mu1, tf.linalg.inv(sigma2_square)), mu2_T - mu1_T) item3 = tf.log(tf.linalg.det(sigma2_square) / tf.linalg.det(sigma1_square)) item1 = tf.reshape(item1, [-1, ]) item2 = tf.reshape(item2, [-1, ]) item3 = tf.reshape(item3, [-1, ]) return (item1 + item2 + item3 - 2) / 2.
[docs]def gaussian_kullback_leibler_divergence(boxes1, boxes2): """ Calculate the kullback-leibler divergence between boxes1 and boxes2 :param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, shape: [-1, 5] :param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, shape: [-1, 5] :return: kullback-leibler divergence, :math:`\mathbf D_{kl}` """ x1, y1, x2, y2, sigma1, sigma2 = box2gaussian(boxes1, boxes2) mu1 = tf.reshape(tf.concat([x1, y1], axis=-1), [-1, 1, 2]) mu2 = tf.reshape(tf.concat([x2, y2], axis=-1), [-1, 1, 2]) mu1_T = tf.reshape(tf.concat([x1, y1], axis=-1), [-1, 2, 1]) mu2_T = tf.reshape(tf.concat([x2, y2], axis=-1), [-1, 2, 1]) kl_divergence = tf.reshape(kullback_leibler_divergence(mu1, mu2, mu1_T, mu2_T, sigma1, sigma2), [-1, 1]) return kl_divergence
if __name__ == '__main__': # from alpharotate.libs.utils.coordinate_convert import forward_convert rbox = np.array([[0, 0, 40, 20, -1 * np.pi / 180]]) # qbox = forward_convert(rbox, False) qbox = np.array([[-10, 20, -10, -20, 10, -20, 10, 20]]) rgaussian = box2gaussian(tf.convert_to_tensor(rbox), tf.convert_to_tensor(rbox)) qgaussian = qbox2gaussian(tf.convert_to_tensor(qbox), 4) with tf.Session() as sess: out1, out2 = sess.run([rgaussian, qgaussian]) print(out1) print(out2)