# -*- coding: utf-8 -*-
# Author: Xue Yang <yangxue-2019-sjtu@sjtu.edu.cn>, <yangxue0827@126.com>
# License: Apache-2.0 license
# Copyright (c) SJTU. ALL Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
[docs]def box2gaussian(boxes1, boxes2):
"""
Convert box :math:`(x,y,w,h,\theta)` to Gaussian distribution :math:`(\mathbf \mu, \mathbf \Sigma)`
:param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, [-1, 5]
:param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, [-1, 5]
:return: :math:`(\mathbf \mu, \mathbf \Sigma)`
"""
x1, y1, w1, h1, theta1 = tf.unstack(boxes1, axis=1)
x2, y2, w2, h2, theta2 = tf.unstack(boxes2, axis=1)
x1 = tf.reshape(x1, [-1, 1])
y1 = tf.reshape(y1, [-1, 1])
h1 = tf.reshape(h1, [-1, 1])
w1 = tf.reshape(w1, [-1, 1])
theta1 = tf.reshape(theta1, [-1, 1])
x2 = tf.reshape(x2, [-1, 1])
y2 = tf.reshape(y2, [-1, 1])
h2 = tf.reshape(h2, [-1, 1])
w2 = tf.reshape(w2, [-1, 1])
theta2 = tf.reshape(theta2, [-1, 1])
# theta1 *= (np.pi / 180)
# theta2 *= (np.pi / 180)
sigma1_1 = w1 / 2 * tf.cos(theta1) ** 2 + h1 / 2 * tf.sin(theta1) ** 2
sigma1_2 = w1 / 2 * tf.sin(theta1) * tf.cos(theta1) - h1 / 2 * tf.sin(theta1) * tf.cos(theta1)
sigma1_3 = w1 / 2 * tf.sin(theta1) * tf.cos(theta1) - h1 / 2 * tf.sin(theta1) * tf.cos(theta1)
sigma1_4 = w1 / 2 * tf.sin(theta1) ** 2 + h1 / 2 * tf.cos(theta1) ** 2
sigma1 = tf.reshape(tf.concat([sigma1_1, sigma1_2, sigma1_3, sigma1_4], axis=-1), [-1, 2, 2])
sigma2_1 = w2 / 2 * tf.cos(theta2) ** 2 + h2 / 2 * tf.sin(theta2) ** 2
sigma2_2 = w2 / 2 * tf.sin(theta2) * tf.cos(theta2) - h2 / 2 * tf.sin(theta2) * tf.cos(theta2)
sigma2_3 = w2 / 2 * tf.sin(theta2) * tf.cos(theta2) - h2 / 2 * tf.sin(theta2) * tf.cos(theta2)
sigma2_4 = w2 / 2 * tf.sin(theta2) ** 2 + h2 / 2 * tf.cos(theta2) ** 2
sigma2 = tf.reshape(tf.concat([sigma2_1, sigma2_2, sigma2_3, sigma2_4], axis=-1), [-1, 2, 2])
return x1, y1, x2, y2, sigma1, sigma2
def qbox2gaussian(boxes, num_pts):
x = tf.reshape(tf.reduce_mean(boxes[:, ::2], axis=1), [-1, 1])
y = tf.reshape(tf.reduce_mean(boxes[:, 1::2], axis=1), [-1, 1])
mu = tf.reshape(tf.concat([x, y], axis=-1), [-1, 1, 2])
sigma = tf.linalg.matmul((tf.reshape(boxes[:, :2], [-1, 1, 2]) - mu), tf.reshape(boxes[:, :2], [-1, 1, 2]) - mu, transpose_a=True)
for n in range(num_pts-1):
sigma += tf.linalg.matmul((tf.reshape(boxes[:, (n+1)*2:(n+2)*2], [-1, 1, 2]) - mu), tf.linalg.transpose(tf.reshape(boxes[:, (n+1)*2:(n+2)*2], [-1, 1, 2]) - mu))
sigma /= num_pts
return mu, sigma
[docs]def wasserstein_distance_item2(sigma1, sigma2):
"""
Calculate the second term of wasserstein distance: :math:`\mathbf Tr(\mathbf \Sigma_{1} + \mathbf \Sigma_{2} - 2(\mathbf \Sigma_{1}^{1/2}\mathbf \Sigma_{2}\mathbf \Sigma_{1}^{1/2})^{1/2})`
:param sigma1: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2]
:param sigma2: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2]
:return: the second term of wasserstein distance
"""
wasserstein_diss_item2 = tf.linalg.matmul(sigma1, sigma1) + tf.linalg.matmul(sigma2, sigma2) - 2 * tf.linalg.sqrtm(
tf.linalg.matmul(tf.linalg.matmul(sigma1, tf.linalg.matmul(sigma2, sigma2)), sigma1))
wasserstein_diss_item2 = tf.linalg.trace(wasserstein_diss_item2)
return wasserstein_diss_item2
[docs]def gaussian_wasserstein_distance(boxes1, boxes2):
"""
Calculate the wasserstein distance between boxes1 and boxes2: :math:`\mathbf D_{w} = ||\mathbf \mu_{1} - \mathbf \mu_{2}||^{2}_{2} + \mathbf Tr(\mathbf \Sigma_{1} + \mathbf \Sigma_{2} - 2(\mathbf \Sigma_{1}^{1/2}\mathbf \Sigma_{2}\mathbf \Sigma_{1}^{1/2})^{1/2})`
:param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, shape: [-1, 5]
:param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, shape: [-1, 5]
:return: wasserstein distance, :math:`\mathbf D_{w}`
"""
x1, y1, x2, y2, sigma1, sigma2 = box2gaussian(boxes1, boxes2)
wasserstein_dis_item1 = (x1 - x2) ** 2 + (y1 - y2) ** 2
wasserstein_dis_item2 = tf.reshape(wasserstein_distance_item2(sigma1, sigma2), [-1, 1])
wasserstein_distance = wasserstein_dis_item1 + wasserstein_dis_item2
return wasserstein_distance
[docs]def kullback_leibler_divergence(mu1, mu2, mu1_T, mu2_T, sigma1, sigma2):
"""
Calculate the kullback-leibler divergence between two Gaussian distributions : :math:`\mathbf D_{kl} = 0.5*((\mathbf \mu_{1}-\mathbf \mu_{2})^T \mathbf \Sigma_{2}^{1/2}(\mathbf \mu_{1}-\mathbf \mu_{2})+0.5*\mathbf Tr(\mathbf \Sigma_{2}^{-1} \mathbf \Sigma_{1})+0.5*\ln |\mathbf \Sigma_{2}|/|\mathbf \Sigma_{1}| -1`
:param mu1: mean :math:`(\mu_{1})` of the Gaussian distribution, shape: [-1, 1, 2]
:param mu2: mean :math:`(\mu_{2})` of the Gaussian distribution, shape: [-1, 1, 2]
:param mu1_T: transposition of :math:`(\mu_{1})`, shape: [-1, 2, 1]
:param mu2_T: transposition of :math:`(\mu_{2})`, shape: [-1, 2, 1]
:param sigma1: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2]
:param sigma2: covariance :math:`(\Sigma_{1})` of the Gaussian distribution, shape: [-1, 2, 2]
:return: kullback-leibler divergence, :math:`\mathbf D_{kl}`
"""
sigma1_square = tf.linalg.matmul(sigma1, sigma1)
sigma2_square = tf.linalg.matmul(sigma2, sigma2)
item1 = tf.linalg.trace(tf.linalg.matmul(tf.linalg.inv(sigma2_square), sigma1_square))
item2 = tf.linalg.matmul(tf.linalg.matmul(mu2 - mu1, tf.linalg.inv(sigma2_square)), mu2_T - mu1_T)
item3 = tf.log(tf.linalg.det(sigma2_square) / tf.linalg.det(sigma1_square))
item1 = tf.reshape(item1, [-1, ])
item2 = tf.reshape(item2, [-1, ])
item3 = tf.reshape(item3, [-1, ])
return (item1 + item2 + item3 - 2) / 2.
[docs]def gaussian_kullback_leibler_divergence(boxes1, boxes2):
"""
Calculate the kullback-leibler divergence between boxes1 and boxes2
:param boxes1: :math:`(x_{1},y_{1},w_{1},h_{1},\theta_{1})`, shape: [-1, 5]
:param boxes2: :math:`(x_{2},y_{2},w_{2},h_{2},\theta_{2})`, shape: [-1, 5]
:return: kullback-leibler divergence, :math:`\mathbf D_{kl}`
"""
x1, y1, x2, y2, sigma1, sigma2 = box2gaussian(boxes1, boxes2)
mu1 = tf.reshape(tf.concat([x1, y1], axis=-1), [-1, 1, 2])
mu2 = tf.reshape(tf.concat([x2, y2], axis=-1), [-1, 1, 2])
mu1_T = tf.reshape(tf.concat([x1, y1], axis=-1), [-1, 2, 1])
mu2_T = tf.reshape(tf.concat([x2, y2], axis=-1), [-1, 2, 1])
kl_divergence = tf.reshape(kullback_leibler_divergence(mu1, mu2, mu1_T, mu2_T, sigma1, sigma2), [-1, 1])
return kl_divergence
if __name__ == '__main__':
# from alpharotate.libs.utils.coordinate_convert import forward_convert
rbox = np.array([[0, 0, 40, 20, -1 * np.pi / 180]])
# qbox = forward_convert(rbox, False)
qbox = np.array([[-10, 20, -10, -20, 10, -20, 10, 20]])
rgaussian = box2gaussian(tf.convert_to_tensor(rbox), tf.convert_to_tensor(rbox))
qgaussian = qbox2gaussian(tf.convert_to_tensor(qbox), 4)
with tf.Session() as sess:
out1, out2 = sess.run([rgaussian, qgaussian])
print(out1)
print(out2)