o
    ®Ÿ?eÐ¥  ã                   @   sÂ  d Z ddlZddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ ddl	mZ dd	l	mZ dd
l	mZ e d¡dd„ ƒZe d¡dd„ ƒZe d¡dd„ ƒZe d¡dd„ ƒZe d¡dd„ ƒZe d¡dd„ ƒZe d¡dd„ ƒZe d ¡d!d"„ ƒZe d#¡d$d%„ ƒZe d&¡d'd(„ ƒZe d)¡d*d+„ ƒZe d,¡d-d.„ ƒZe d/¡d0d1„ ƒZe d2¡d3d4„ ƒZe d5¡d6d7„ ƒZe d8¡d9d:„ ƒZe d;¡d<d=„ ƒZ e d>¡d?d@„ ƒZ!e dA¡dBdC„ ƒZ"e dD¡dEdF„ ƒZ#e dG¡dHdI„ ƒZ$e dJ¡dKdL„ ƒZ%e dM¡dNdO„ ƒZ&e dP¡dQdR„ ƒZ'e dS¡dTdU„ ƒZ(e dV¡dWdX„ ƒZ)e dY¡dZd[„ ƒZ*e d\¡d]d^„ ƒZ+e d_¡d`da„ ƒZ,e db¡dcdd„ ƒZ-dedf„ Z.e dg¡dhdi„ ƒZ/e dj¡dkdl„ ƒZ0e dm¡dndo„ ƒZ1e dp¡dqdr„ ƒZ2e ds¡dtdu„ ƒZ3e dv¡dwdx„ ƒZ4e dy¡dzd{„ ƒZ5e d|¡d}d~„ ƒZ6e d¡d€d„ ƒZ7e d‚¡dƒd„„ ƒZ8e d…¡d†d‡„ ƒZ9e dˆ¡d‰dŠ„ ƒZ:e d‹¡dŒd„ ƒZ;e dŽ¡dd„ ƒZ<e d‘¡d’d“„ ƒZ=e d”¡d•d–„ ƒZ>e d—¡d˜d™„ ƒZ?dšd›„ Z@e dœ¡ddž„ ƒZAe dŸ¡d d¡„ ƒZBe d¢¡d£d¤„ ƒZC	¥dÃd¦d§„ZDe d¨¡d©dª„ ƒZEe d«¡d¬d­„ ƒZFe d®¡d¯d°„ ƒZGe d±¡d²d³„ ƒZHe d´¡e dµ¡d¶d·„ ƒƒZIe d¸¡d¹dº„ ƒZJe d»¡d¼d½„ ƒZKd¾d¿„ ZLe dÀ¡dÁdÂ„ ƒZMdS )Äz-Gradients for operators defined in nn_ops.py.é    N)Úbackprop)Údtypes)Úops)Ú	array_ops)Úarray_ops_stack)Ú
gen_nn_ops)Úmath_ops)Únn_opsZConv2DBackpropInputc                 C   s    dt j|t | jd ¡| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d	¡ ¡ d
	t j|| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d	¡ ¡ d
gS )úËThe derivatives for deconvolution.

  Args:
    op: the Deconvolution op.
    grad: the tensor representing the gradient w.r.t. the output

  Returns:
    the gradients w.r.t. the input and the filter
  Né   é   Ú	dilationsÚstridesÚpaddingÚexplicit_paddingsÚuse_cudnn_on_gpuÚdata_format©r   r   r   r   r   r   )r   Úconv2d_backprop_filterr   ÚshapeÚinputsÚget_attrÚdecodeÚconv2d©ÚopÚgrad© r   ú^/home/www/facesmatcher.com/pyenv/lib/python3.10/site-packages/tensorflow/python/ops/nn_grad.pyÚ_Conv2DBackpropInputGrad   s.   ÷
øôr   ZConv2DBackpropFilterc                 C   s    t jt | jd ¡|| jd |  d¡|  d¡|  d¡|  d¡|  d¡|  d¡ ¡ d		d t j| jd ||  d¡|  d¡|  d¡|  d¡|  d¡|  d¡ ¡ d	gS )
Nr   r   r   r   r   r   r   r   r   )r   Úconv2d_backprop_inputr   r   r   r   r   r   r   r   r   r   Ú_Conv2DBackpropFilterGradD   s.   ÷	øõr!   Z"DepthwiseConv2dNativeBackpropInputc                 C   sˆ   dt j|t | jd ¡| jd |  d¡|  d¡|  d¡|  d¡|  d¡d	t j|| jd |  d¡|  d¡|  d¡|  d¡|  d¡d	gS )
r
   Nr   r   r   r   r   r   r   ©r   r   r   r   r   )r   Ú'depthwise_conv2d_native_backprop_filterr   r   r   r   Údepthwise_conv2d_nativer   r   r   r   Ú'_DepthwiseConv2dNativeBackpropInputGrad_   s*   ø	ùõr%   Z#DepthwiseConv2dNativeBackpropFilterc                 C   sˆ   t jt | jd ¡|| jd |  d¡|  d¡|  d¡|  d¡|  d¡dd t j| jd ||  d¡|  d¡|  d¡|  d¡|  d¡dgS )	Nr   r   r   r   r   r   r   r"   )r   Ú&depthwise_conv2d_native_backprop_inputr   r   r   r   r$   r   r   r   r   Ú(_DepthwiseConv2dNativeBackpropFilterGrad€   s*   øùör'   ZConv3Dc              
   C   sŒ   |   d¡ ¡ }t | jd | jd g¡\}}tj|| jd ||   d¡|   d¡|   d¡|dtj| jd |||   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   ©r   r   r   r   )r   r   r   Úshape_nr   r	   Úconv3d_backprop_input_v2Úconv3d_backprop_filter_v2)r   r   r   Úshape_0Úshape_1r   r   r   Ú_Conv3DGrad—   s*   ùù÷r.   ZConv3DBackpropInputV2c              
   C   sz   |   d¡ ¡ }d tj|t | jd ¡| jd |   d¡|   d¡|   d¡|dtj|| jd |   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   r(   )r   r   r	   r+   r   r   r   Úconv3d©r   r   r   r   r   r   Ú_Conv3DBackpropInputGrad¯   s(   ùúör1   ZConv3DBackpropFilterV2c              
   C   sz   |   d¡ ¡ }tjt | jd ¡|| jd |   d¡|   d¡|   d¡|dd tj| jd ||   d¡|   d¡|   d¡|dgS )Nr   r   r   r   r   r   r(   )r   r   r	   r*   r   r   r   r/   r0   r   r   r   Ú_Conv3DBackpropFilterGradÆ   s(   ùú÷r2   Z	AvgPool3Dc              	   C   s>   t jt | jd ¡||  d¡|  d¡|  d¡|  d¡ ¡ dS ©Nr   Úksizer   r   r   )r4   r   r   r   )r   Zavg_pool3d_gradr   r   r   r   r   r   r   r   r   Ú_AvgPool3DGradÜ   s   úr5   ZAvgPool3DGradc              	   C   s@   t  | jd ¡tj||  d¡|  d¡|  d¡|  d¡ ¡ dfS ©Nr   r4   r   r   r   )r   )r   Ústop_gradientr   r   Z
avg_pool3dr   r   r   r   r   r   Ú_AvgPool3DGradGradç   s   ûÿr8   Z	MaxPool3Dc              
   C   s@   t j| jd | jd ||  d¡|  d¡|  d¡|  d¡ ¡ dS r3   )r   Úmax_pool3d_gradr   Úoutputsr   r   r   r   r   r   Ú_MaxPool3DGradò   s   ùr;   ZMaxPool3DGradc                 C   ó^   t  | jd ¡t  | jd ¡tj| jd | jd ||  d¡|  d¡|  d¡|  d¡ ¡ dfS ©Nr   r   r4   r   r   r   ©r   r   )r   Ú
zeros_liker   r   Zmax_pool3d_grad_gradr   r   r   r   r   r   Ú_MaxPool3DGradGradþ   ó   ùþr@   ZMaxPool3DGradGradc                 C   r<   r=   )r   r?   r   r   r9   r   r   r   r   r   r   Ú_MaxPool3DGradGradGrad  rA   rB   ZSoftmaxc                 C   s*   | j d }tj|| ddd}|| | S )a  The derivative of the softmax nonlinearity.

  We assume that probs is of shape [batch_size * dim]
  The formula for dsoftmax / dx = (diag(softmax) - softmax * softmax').
  This matrix is diagonal minus a rank one matrix, so it is easy to implement
  as follows:

    grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax

  Args:
     op: the Softmax op.
     grad_softmax:  the tensor representing the gradient w.r.t. the softmax
       output.

  Returns:
     gradient w.r.t the input to the softmax

  r   éÿÿÿÿT©Úkeepdims)r:   r   Ú
reduce_sum)r   Zgrad_softmaxÚsoftmaxZsum_channelsr   r   r   Ú_SoftmaxGrad  s   
rH   Z
LogSoftmaxc                 C   s(   t  | jd ¡}|t j|ddd|  S )a  The gradient for log_softmax.

      log_softmax = input - log(sum(exp(input))
      dlog_softmax/dinput = diag - softmax(input)

  Args:
    op: The log softmax op.
    grad: The tensor representing the gradient w.r.t. the output.

  Returns:
    The gradients w.r.t. the input.
  r   rC   TrD   )r   Úexpr:   rF   )r   r   rG   r   r   r   Ú_LogSoftmaxGrad3  s   rJ   ZBiasAddc                 C   s8   z|   d¡}W n ty   d}Y nw |tj||dfS )a§  Return the gradients for the 2 inputs of bias_op.

  The first input of unused_bias_op is the tensor t, and its gradient is
  just the gradient the unused_bias_op received.

  The second input of unused_bias_op is the bias vector which has one fewer
  dimension than "received_grad" (the batch dimension.)  Its gradient is the
  received gradient Summed on the batch dimension, which is the first dimension.

  Args:
    op: The BiasOp for which we need to generate gradients.
    received_grad: Tensor.  The gradients passed to the BiasOp.

  Returns:
    Two tensors, the first one for the "tensor" input of the BiasOp,
    the second one for the "bias" input of the BiasOp.
  r   N)Zout_backpropr   )r   Ú
ValueErrorr   Zbias_add_grad)r   Úreceived_gradr   r   r   r   Ú_BiasAddGradE  s   ÿÿÿrM   ZBiasAddGradc              	   C   sì   z|   d¡}W n ty   d}Y nw t | jd ¡}t |¡}|dkrNt t |dd… ¡|t |dd… ¡gd¡}t |dd… dg|dd… gd¡}nt t |dd… ¡|gd¡}t |dd… dggd¡}t ||¡}t ||¡S )a(  Gradient for the BiasAddGrad op.

  Args:
    op: BiasAddGrad op for which we are calculating gradients.
    received_grad: The gradients passed to the BiasAddGrad op.

  Returns:
    A single gradient Tensor for the input to BiasAddGrad (which
    is the gradient of the bias term in BiasAdd)
  r   Nr   ó   NCHWr   r   rC   )	r   rK   r   r   r   ÚconcatZ	ones_likeÚreshapeZtile)r   rL   r   r   Z
bias_shapeZexpanded_shapeZ
tile_multsZexpanded_gradr   r   r   Ú_BiasAddGradGrada  s*   ÿ
þý&ÿrQ   Z	BiasAddV1c                 C   s$   t  t |¡d ¡}|t  ||¡fS )a³  Return the gradients for the 2 inputs of bias_op.

  The first input of unused_bias_op is the tensor t, and its gradient is
  just the gradient the unused_bias_op received.

  The second input of unused_bias_op is the bias vector which has one fewer
  dimension than "received_grad" (the batch dimension.)  Its gradient is the
  received gradient Summed on the batch dimension, which is the first dimension.

  Args:
    unused_bias_op: The BiasOp for which we need to generate gradients.
    received_grad: Tensor.  The gradients passed to the BiasOp.

  Returns:
    Two tensors, the first one for the "tensor" input of the BiasOp,
    the second one for the "bias" input of the BiasOp.
  r   )r   Úranger   ÚrankrF   )Zunused_bias_oprL   Zreduction_dim_tensorr   r   r   Ú_BiasAddGradV1…  s   ÿrT   ZReluc                 C   ó   t  || jd ¡S ©Nr   )r   Ú	relu_gradr:   r   r   r   r   Ú	_ReluGrad  ó   rX   ZEluGradc                 C   s8   | j d }t ||¡t |dk || j d  t |¡¡fS )Nr   r   )r   r   Úelu_gradr   Úwherer?   )r   r   Zelu_xr   r   r   Ú_EluGradGrad¢  ó   

ÿÿr\   ZSeluGradc                 C   s8   | j d }t ||¡t |dk || j d  t |¡¡fS )Nr   g        r   )r   r   Ú	selu_gradr   r[   r?   )r   r   Zselu_xr   r   r   Ú_SeluGradGradª  r]   r_   ZRelu6c                 C   rU   rV   )r   Ú
relu6_gradr:   r   r   r   r   Ú
_Relu6Grad²  rY   ra   Z	Relu6Gradc                 C   ó    | j d }t ||¡t |¡fS ©Nr   )r   r   r`   r   r?   ©r   r   Úxr   r   r   Ú_Relu6GradGrad·  ó   
rf   Z	LeakyReluc                 C   s$   | j d }|  d¡}tj|||dS )Nr   Úalpha©rh   )r   r   r   Úleaky_relu_grad©r   r   re   rh   r   r   r   Ú_LeakyReluGrad½  s   

rl   ZLeakyReluGradc                 C   s.   | j d }|  d¡}tj|||dt |¡fS )Nr   rh   ri   )r   r   r   rj   r   r?   rk   r   r   r   Ú_LeakyReluGradGradÄ  s   

ÿÿrm   ZEluc                 C   rU   rV   )r   rZ   r:   r   r   r   r   Ú_EluGradÌ  rY   rn   ZSeluc                 C   rU   rV   )r   r^   r:   r   r   r   r   Ú	_SeluGradÑ  rY   ro   ZSoftplusc                 C   s   |t  | jd ¡ S rV   )r   Zsigmoidr   r   r   r   r   Ú_SoftplusGradÖ  s   rp   ZSoftplusGradc                 C   sp   | j \}}t |g¡# t ||¡}|| t | ¡d t |¡  }||fW  d   ƒ S 1 s1w   Y  d S )Ng       @)r   r   Zcontrol_dependenciesr   Zsoftplus_gradr   rI   )r   r   Zdyre   ZddyZd2xr   r   r   Ú_SoftplusGradGradÛ  s   
"$ýrq   ZSoftsignc                 C   rU   rV   )r   Zsoftsign_gradr   r   r   r   r   Ú_SoftsignGradè  rY   rr   ZReluGradc                 C   rb   rc   )r   r   rW   r   r?   rd   r   r   r   Ú_ReluGradGradí  rg   rs   c                 C   s   t  | d¡} | | S )zãMultiply after broadcasting vec to match dimensions of mat.

  Args:
    vec: A 1-D tensor of dimension [D0]
    mat: A 2-D tensor of dimension [D0, D1]

  Returns:
    A tensor of dimension [D0, D1], the result of vec * mat
  rC   )r   Úexpand_dims)ZvecÚmatr   r   r   Ú_BroadcastMuló  s   rv   ZSoftmaxCrossEntropyWithLogitsc              
   C   s„   | j d }t||ƒ}| jd }|dur7t|ddƒs7t |¡}||tjt 	t 
|d¡t 
|d¡¡dd | 7 }|t|t |¡ ƒfS )z4Gradient function for SoftmaxCrossEntropyWithLogits.r   r   NÚ_is_zeros_tensorFr   ©Úaxis)r:   rv   r   Úgetattrr	   rG   r   Úsqueezer   Úmatmulrt   Zlog_softmax©r   Z	grad_lossZ	grad_gradZsoftmax_gradr   ZlogitsrG   r   r   r   Ú"_SoftmaxCrossEntropyWithLogitsGrad  s"   



ÿ


þüür~   Z#SparseSoftmaxCrossEntropyWithLogitsc              
   C   sv   | j d }t||ƒ}| jd }|dur7t|ddƒs7t |¡}||tjt 	t 
|d¡t 
|d¡¡dd | 7 }|dfS )z:Gradient function for SparseSoftmaxCrossEntropyWithLogits.r   r   Nrw   Fr   rx   )r:   rv   r   rz   r	   rG   r   r{   r   r|   rt   r}   r   r   r   Ú(_SparseSoftmaxCrossEntropyWithLogitsGrad  s"   
	


ÿ


þüür   ZConv2Dc           
      C   sž   |   d¡}|   d¡}|   d¡}|   d¡}|   d¡}|   d¡}t | jd | jd g¡\}}	tj|| jd |||||||d		tj| jd |	|||||||d		gS )
zGradient function for Conv2D.r   r   r   r   r   r   r   r   r   )r   r   r)   r   r   r    r   )
r   r   r   r   r   r   r   r   r,   r-   r   r   r   Ú_Conv2DGrad5  s<   





	÷
÷õr€   ZDepthwiseConv2dNativec                 C   s”   t jt | jd ¡| jd ||  d¡|  d¡|  d¡|  d¡|  d¡dt j| jd t | jd ¡||  d¡|  d¡|  d¡|  d¡|  d¡dgS )	Nr   r   r   r   r   r   r   r"   )r   r&   r   r   r   r   r#   r   r   r   r   Ú_DepthwiseConv2dNativeGrad^  s*   ø	øör   Z
Dilation2Dc                 C   sd   t  | jd | jd ||  d¡|  d¡|  d¡¡t  | jd | jd ||  d¡|  d¡|  d¡¡gS )Nr   r   r   Zratesr   )r	   Zdilation2d_backprop_inputr   r   Zdilation2d_backprop_filterr   r   r   r   Ú_Dilation2DGradv  s   ýýûr‚   ZLRNc              	   C   sL   |   d¡}|   d¡}|   d¡}|   d¡}t || jd | jd ||||¡gS )NÚdepth_radiusÚbiasrh   Úbetar   )r   r   Zlrn_gradr   r:   )r   r   rƒ   r„   rh   r…   r   r   r   Ú_LRNGrad„  s   



ÿÿr†   ZAvgPoolc              	   C   s:   t jt | jd ¡||  d¡|  d¡|  d¡|  d¡dS r6   )r   Zavg_pool_gradr   r   r   r   r   r   r   r   Ú_AvgPoolGrad  s   úr‡   ZAvgPoolGradc              	   C   s<   t  | jd ¡tj||  d¡|  d¡|  d¡|  d¡dfS r6   )r   r7   r   r   Zavg_poolr   r   r   r   r   Ú_AvgPoolGradGrad›  s   ûÿrˆ   ZMaxPoolc                 C   sD   t j| jd | jd ||  d¡|  d¡|  d¡|  d¡|  d¡dS )Nr   r4   r   r   r   r   )r   r   r   )r   Úmax_pool_gradr   r:   r   r   r   r   r   Ú_MaxPoolGrad¦  s   ørŠ   Z	MaxPoolV2c              
   C   sJ   | j d }| j d }tj| j d | jd ||||  d¡|  d¡dd d fS )Nr   r   r   r   r   r>   )r   r   Zmax_pool_grad_v2r:   r   ©r   r   r4   r   r   r   r   Ú_MaxPoolGradV2³  s   

ùùrŒ   ZMaxPoolWithArgmaxc              
   C   s>   ~t j| jd || jd |  d¡|  d¡|  d¡|  d¡dS )Nr   r   r4   r   r   Úinclude_batch_in_index)r   r   )r   Zmax_pool_grad_with_argmaxr   r:   r   )r   r   Zunused_argmax_gradr   r   r   Ú_MaxPoolGradWithArgmaxÁ  s   ùrŽ   ZMaxPoolGradc                 C   óZ   t  | jd ¡t  | jd ¡tj| jd | jd ||  d¡|  d¡|  d¡|  d¡dfS r=   )r   r?   r   r   Zmax_pool_grad_gradr   r   r   r   r   Ú_MaxPoolGradGradÎ  ó   ùþr   ZMaxPoolGradV2c                 C   sf   | j d }| j d }t | j d ¡t | j d ¡tj| j d | j d ||||  d¡|  d¡dd d fS )Né   é   r   r   r   r   r>   )r   r   r?   r   Zmax_pool_grad_grad_v2r   r‹   r   r   r   Ú_MaxPoolGradGradV2Ü  s   

ù÷r”   ZMaxPoolGradGradc                 C   r   r=   )r   r?   r   r   r‰   r   r   r   r   r   Ú_MaxPoolGradGradGradì  r‘   r•   ZFractionalMaxPoolc              
   C   s2   t  | jd | jd || jd | jd |  d¡¡S )a  Returns gradient for FractionalMaxPool.

  Since FractionalMaxPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.

  Args:
    op: The FractionalMaxPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.

  Returns:
    Input backprop for FractionalMaxPool op.
  r   r   r   Úoverlapping)r   Zfractional_max_pool_gradr   r:   r   ©r   Zgrad_0Zunused_grad_1Zunused_grad_2r   r   r   Ú_FractionalMaxPoolGradú  s   "þr˜   ZFractionalAvgPoolc              	   C   s.   t  | jd  ¡ || jd | jd |  d¡¡S )a  Returns gradient for FractionalAvgPool.

  Since FractionalAvgPool has three outputs, there are three gradients passed in
  for each of the outputs. Only the first one is useful, the other two gradients
  are empty.

  Args:
    op: The FractionalAvgPoolOp.
    grad_0: Gradient with respect to op.outputs[0]
    unused_grad_1: Gradient with respect to op.outputs[1]/row_seq. It is empty.
    unused_grad_2: Gradient with respect to op.outputs[2]/col_seq. It is empty.

  Returns:
    Input backprop for FractionalAvgPool op.
  r   r   r   r–   )r   Zfractional_avg_pool_gradr   Ú	get_shaper:   r   r—   r   r   r   Ú_FractionalAvgPoolGrad  s   þrš   Z BatchNormWithGlobalNormalizationc                 C   sR   t  | jd | jd | jd | jd ||  d¡|  d¡¡\}}}}}|||||fS )a$  Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.

  We do not backprop anything for the mean and var intentionally as they are
  not being trained with backprop in the operation.

  Args:
    op: The BatchNormOp for which we need to generate gradients.
    grad: Tensor.  The gradients passed to the BatchNormOp.

  Returns:
    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
    dm: Backprop for mean, which is
        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
    dv: Backprop for variance, which is
        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
    db: Backprop for beta, which is grad reduced in all except the
        last dimension.
    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
  r   r   r   r“   Zvariance_epsilonZscale_after_normalization)r   Z)batch_norm_with_global_normalization_gradr   r   )r   r   ÚdxÚdmZdvÚdbÚdgr   r   r   Ú%_BatchNormWithGlobalNormalizationGrad&  s
   "þrŸ   c              	   G   s°  | j d }|d }| j d }|  d¡}|  d¡}|  d¡}|dkr%tj}	n|dkr-tj}	ntj}	|r[|||| jd | jd |||d	œ}
|dkrN| jd
 |
d< |	di |
¤Ž\}}}}}nv| j d }| j d }|dkrzt |g d¢¡}t |g d¢¡}n|dkrŽt |g d¢¡}t |g d¢¡}|dv r”dnd}||||||||d	œ}
|dkr¬| jd
 |
d< |	di |
¤Ž\}}}}}|dkrÅt |g d¢¡}n|dkrÑt |g d¢¡}|||ddfS )aã  Return the gradients for the 3 inputs of BatchNorm.

  Args:
    op: The BatchNormOp for which we need to compute gradients.
    version: Integer indicating which version to use of the fused batch
      norm gradient.
    *grad: An argument list for tensors of gradients wrt the outputs
      with grad[0] as grad_y.

  Returns:
    grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
            [grad_y - mean(grad_y) - (x - mean(x)) *
            mean(grad_y * (x - mean(x))) / (variance + epsilon)]
            in training mode; grad_y * scale * rsqrt(pop_variance + epsilon)
            in freeze mode.

    grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) *
                rsqrt(variance + epsilon)) in training mode;
                sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon))
                in freeze mode.

    grad_offset: gradient for offset, which is sum(grad_y) in training mode;
                 sum(grad_y) in freeze mode.
  r   r   Úepsilonr   Úis_trainingr   r’   r“   )Z
y_backpropre   ÚscaleZreserve_space_1Zreserve_space_2r    r   r¡   é   Zreserve_space_3rN   )r   r   r’   r   ó   NCDHW)r   r   r’   r“   r   )rN   ó   NHWCZNHWCZNDHWC)r   r’   r   r   )r   r“   r   r   r’   Nr   )	r   r   r   Zfused_batch_norm_grad_v3Zfused_batch_norm_grad_v2Zfused_batch_norm_gradr:   r   Z	transpose)r   Úversionr   re   Úgrad_yr¢   r    r   r¡   Zgrad_funÚargsr›   ZdscaleZdoffsetÚ_Úpop_meanÚpop_varZtarget_data_formatr   r   r   Ú_BaseFusedBatchNormGradA  sh   




ø


ÿø
r¬   ZFusedBatchNormc                 G   ó   t | dg|¢R Ž S rV   ©r¬   r   r   r   r   Ú_FusedBatchNormGrad“  rY   r¯   ZFusedBatchNormV2c                 G   r­   rc   r®   r   r   r   r   Ú_FusedBatchNormV2Grad˜  rY   r°   ZFusedBatchNormV3c                 G   r­   )Nr   r®   r   r   r   r   Ú_FusedBatchNormV3Grad  rY   r±   Tc                 C   s´  |j j}|tjks|tjkrt |tj¡}t | tj¡} |rÙ|dkr)d}	g d¢}
n;|dkr4d}	g d¢}
n0|dkrNd}	g d¢}
d	t 	|¡d	d	g}t 
||¡}nd}	g d
¢}
d	t 	|¡d	d	d	g}t 
||¡}tj| |
|	d}tj||
|	d}tjt |t |¡¡|
|	d}| | }|| }tj| | |
|	d}|t || ¡ |t || ¡| |   }t || ¡tj| | |
|	d }|dksÄ|dkrÉt |¡}tj| |
d}t ||¡||fS |dkrâg d¢}
nN|dkrëg d¢}
nE|dkrg d¢}
d	t 	|¡d	d	g}t 
||¡}t 
||¡}t 
||¡}n g d
¢}
d	t 	|¡d	d	d	g}t 
||¡}t 
||¡}t 
||¡}tj| |
d}t || ¡}tj| ||  | |
d}| | | }t ||¡||fS )aS  Returns the gradients for the 3 inputs of BatchNorm.

  Args:
    grad_y: A `Tensor` of 4 or 5 dimensions for gradient for y.
    x: A `Tensor` of 4 or 5 dimensions for x.
    scale: A `Tensor` of 1 dimension for scaling.
    pop_mean: A `Tensor` of 1 dimension for the population mean. Only used when
      is_training=False.
    pop_var: A `Tensor` of 1 dimension for the population variance. Only used
      when is_training=False.
    epsilon: A small float number added to the variance of x.
    data_format: The data format for input. Either b"NHWC" or b"NCHW".
    is_training: A bool value to indicate the operation is for training
      (default) or inference.

  Returns:
    A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient
    for x, grad_scale the gradient for scale, and grad_offset the gradient
    for offset.
  r¥   F)r   r   r   s   NDHWC)r   r   r   r’   rN   T)r   r   r’   r   )r   r   r’   r“   rD   )ry   rE   r¤   rx   )ÚdtypeZ
base_dtyper   Zfloat16Zbfloat16r   ÚcastZfloat32r   ÚsizerP   Zreduce_meanZsquared_differencer7   ZrsqrtZ
reciprocalrF   r{   )r§   re   r¢   rª   r«   r    r   r¡   Zx_dtyperE   Zreduce_axisr   Zmean_grad_yZmean_xZvar_xZgrad_y_offsetZx_offsetÚmeanÚgrad_xÚ
grad_scaleÚgrad_offsetZ	var_rsqrtr   r   r   Ú_BatchNormGrad¢  s€   

ý
ÿÿ
ÿ



ÿr¹   ZFusedBatchNormGradc              
   G   s  |   d¡}|   d¡}|   d¡}| jd }| jd }| jd }| jd }| jd }	|d }
|d }|d }t ¡ *}| |¡ | |¡ | |¡ t|||||	|||ƒ\}}}|
||g}W d	  ƒ n1 sew   Y  | |||g|||g|¡\}}}|||d	d	fS )
aô  Returns the gradients for the 3 inputs of FusedBatchNormGrad.

  Args:
    op: The FusedBatchNormGradOp for which we need to compute gradients.
    *grad: An argument list for tensors of gradients wrt the outputs with
      grad[0] as grad_grad_x, grad[1] as grad_grad_scale, grad[2] as
      grad_grad_offset.

  Returns:
    A tuple (grad_grad_y, grad_x, grad_scale, None, None), where grad_grad_y
    is the gradient for grad_y, grad_x the gradient for x, grad_scale the
    gradient for scale.
  r   r    r¡   r   r   r   r’   r“   N)r   r   r   ZGradientTapeÚwatchr¹   Zgradient)r   r   r   r    r¡   r§   re   r¢   rª   r«   Zgrad_grad_xZgrad_grad_scaleZgrad_grad_offsetZtaper¶   r·   r¸   Zgrad_initialÚgrad_grad_yr   r   r   Ú_FusedBatchNormGradGrad  s0   












ÿú
ÿr¼   ZFusedBatchNormGradV2c                 G   s   t | g|¢R Ž S ©N©r¼   r   r   r   r   Ú_FusedBatchNormGradGradV2'  s   r¿   ZFusedBatchNormGradV3c                 G   s*   t | g|¢R Ž \}}}}}|||d d d fS r½   r¾   )r   r   r»   r¶   r·   r©   r   r   r   Ú_FusedBatchNormGradGradV3,  s   rÀ   ZL2Lossc                 C   s   | j d | S )zÊReturn the gradients for L2Loss.

  Args:
    op: The L2LossOp for which we need to generate gradients.
    grad: Tensor containing a single number.

  Returns:
    The gradient, which is (x * grad).
  r   )r   r   r   r   r   Ú_L2LossGrad2  s   rÁ   ZTopKZTopKV2c           
      C   s  t  | jd ¡}t  | jd ¡}t  t |tj¡t  	|¡d ¡}t  
| jd t d|g¡¡}t  t |tj¡t  	|¡d ¡}t  |¡d }t  
|t t  t dt |tj¡| |¡d¡tj¡ dg¡}	t  
t  t  |	d¡t  
|dg¡t |¡g¡|¡t jg tjdgS )aE  Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  r   r   rC   )r²   )r   r   r   r:   Úgatherr   r³   r   Zint64r´   rP   r   Ústackrt   rR   Zint32Ú
scatter_ndZreduce_prodZzeros)
r   r   r©   Zin_shapeZ	ind_shapeZind_lastdimZind_2dZ
in_lastdimZouterdimÚindr   r   r   Ú	_TopKGrad@  sH   þÿþþýüû

þýûrÆ   Z
ApproxTopKc                    s°   ˆj d j‰ˆdg ‰t tjˆ¡}ˆj‰ˆ d¡‰ˆdk r"ˆˆ ‰‡‡‡‡‡fdd„‰ tj	t
‡ fdd„tˆƒD ƒƒˆd}t ||ˆg¡}t ||g¡}t ||ˆjd j¡S )	zÕReturn the gradients for ApproxTopK.

  Args:
    op: The ApproxTopK for which we need to generate gradients.
    grad: The gradients for backprop.

  Returns:
    Scattered gradient based on the top-k indices.
  r   Zreduction_dimensionr   c                    s\   | ˆkrt  ˆjd ˆ¡S ˆ |  }tt dˆd ¡ƒ}||| < t  t |¡|¡}t  |ˆ¡S rc   )	r   rP   r:   ÚlistÚ	itertoolsÚrepeatr   rR   Zbroadcast_to)ÚdZiota_lenZ
iota_shapeÚiota)Ú	idx_shapeÚlifted_idx_shaper   rS   Úreduction_dimr   r   ÚGetLiftedIdxˆ  s   z)_ApproxTopKGradient.<locals>.GetLiftedIdxc                 3   s    | ]}ˆ |ƒV  qd S r½   r   )Ú.0rÊ   )rÏ   r   r   Ú	<genexpr>’  s   € z&_ApproxTopKGradient.<locals>.<genexpr>rx   )r:   r   Ú	functoolsÚreduceÚoperatorÚmulrS   r   r   rO   rÇ   rR   rP   rÄ   r   )r   r   r©   Zflat_shape_lenZ
lifted_idxZflat_idxZ	flat_gradr   )rÏ   rÌ   rÍ   r   rS   rÎ   r   Ú_ApproxTopKGradiento  s   

	ÿrÖ   Z
NthElementc                 C   sf   | j d }| jd }t t t |d¡|¡|j¡}t |d¡}t t |d¡d¡}t 	||¡| dgS )a:  Return the gradients for NthElement.

  Args:
    op: The NthElementOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the NthElementOp

  Returns:
    A list of two tensors, the first being the gradient w.r.t. the input,
    the second being the gradient w.r.t. the N (None).
  r   rC   N)
r   r:   r   r³   Úequalr   rt   r²   rF   Údivide)r   r   ÚinputÚoutputZ
indicatorsZnum_selectedr   r   r   Ú_NthElementGrad˜  s   

ÿrÛ   c              	   C   sx   g }t t | | jd ¡t ||jd ¡ƒD ]\}}tj||t |¡d d}| t t 	||¡dg¡¡ qt
j|ddS )a}  Replaces each segment with its mean along the last axis.

  Specifically, each value in the `inputs` tensor gets replaced by the mean
  value computed from the values that belong to the same segment.

  Args:
   inputs: A 2-tensor. Aggregation is done over dimension 1.
   segments: A 2-tensor, same shape as `input`.

  Returns:
    The result, same shape and type as `inputs`.
  r   r   )Znum_segmentsrC   rx   )Úzipr   Úsplitr   r   Zunsorted_segment_meanZ
reduce_maxÚappendrP   rÂ   r   rÃ   )r   ÚsegmentsÚresultZinputs_iZ
segments_iZmeans_ir   r   r   Ú_MeanAggregator³  s   þÿÿrá   ZIsotonicRegressionc                 C   s   ~| j d }t||ƒS )af  Gradient for the isotonic regression function.

  Args:
    op: The IsotonicRegression tensorflow op.
    grad_output: Tensor of incoming gradients with respect to the output.
    grad_segments: Tensor of incoming gradients with respect to the segments.

  Returns:
    A tensor, same size as `grad_output` with the gradient with respect to
    the input.
  r   )r:   rá   )r   Zgrad_outputZgrad_segmentsrß   r   r   r   Ú_IsotonicRegressionGradÎ  s   

râ   )T)NÚ__doc__rÒ   rÈ   rÔ   Ztensorflow.python.eagerr   Ztensorflow.python.frameworkr   r   Ztensorflow.python.opsr   r   r   r   r	   ZRegisterGradientr   r!   r%   r'   r.   r1   r2   r5   r8   r;   r@   rB   rH   rJ   rM   rQ   rT   rX   r\   r_   ra   rf   rl   rm   rn   ro   rp   rq   rr   rs   rv   r~   r   r€   r   r‚   r†   r‡   rˆ   rŠ   rŒ   rŽ   r   r”   r•   r˜   rš   rŸ   r¬   r¯   r°   r±   r¹   r¼   r¿   rÀ   rÁ   rÆ   rÖ   rÛ   rá   râ   r   r   r   r   Ú<module>   s  
$

 














#
















(















R



ù_
%


-
(
