o
    ?e ;                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ dd	l
mZ dd
lmZ G dd deZ	dddZdddZdd Zdd ZejfddZdejdfddZdd Zdd ZdS ) z$Utilities related to loss functions.    )distribute_lib)ops)tensor_conversion)backend)keras_tensor)	array_opscond)math_ops)ragged_tensorc                   @   s8   e Zd ZdZdZdZdZdZedd Z	edd	 Z
d
S )ReductionV2ai  Types of loss reduction.

  Contains the following values:

  * `AUTO`: Indicates that the reduction option will be determined by the usage
     context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When
     used with `tf.distribute.Strategy`, outside of built-in training loops such
     as `tf.keras` `compile` and `fit`, we expect reduction value to be
     `SUM` or `NONE`. Using `AUTO` in that case will raise an error.
  * `NONE`: No **additional** reduction is applied to the output of the wrapped
     loss function. When non-scalar losses are returned to Keras functions like
     `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer
     but the reported loss will be a scalar value.

     Caution: **Verify the shape of the outputs when using** `Reduction.NONE`.
     The builtin loss functions wrapped by the loss classes reduce
     one dimension (`axis=-1`, or `axis` if specified by loss function).
     `Reduction.NONE` just means that no **additional** reduction is applied by
     the class wrapper. For categorical losses with an example input shape of
     `[batch, W, H, n_classes]` the `n_classes` dimension is reduced. For
     pointwise losses your must include a dummy axis so that `[batch, W, H, 1]`
     is reduced to `[batch, W, H]`. Without the dummy axis `[batch, W, H]`
     will be incorrectly reduced to `[batch, W]`.

  * `SUM`: Scalar sum of weighted losses.
  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
     This reduction type is not supported when used with
     `tf.distribute.Strategy` outside of built-in training loops like `tf.keras`
     `compile`/`fit`.

     You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
     ```
     with strategy.scope():
       loss_obj = tf.keras.losses.CategoricalCrossentropy(
           reduction=tf.keras.losses.Reduction.NONE)
       ....
       loss = tf.reduce_sum(loss_obj(labels, predictions)) *
           (1. / global_batch_size)
     ```

  Please see the [custom training guide](
  https://www.tensorflow.org/tutorials/distribute/custom_training) for more
  details on this.
  autononesumZsum_over_batch_sizec                 C   s   | j | j| j| jfS N)AUTONONESUMSUM_OVER_BATCH_SIZE)cls r   k/home/www/facesmatcher.com/pyenv/lib/python3.10/site-packages/tensorflow/python/keras/utils/losses_utils.pyallP   s   zReductionV2.allc                 C   s   ||   vrtd| d S )NzInvalid Reduction Key %s.)r   
ValueError)r   keyr   r   r   validateT   s   zReductionV2.validateN)__name__
__module____qualname____doc__r   r   r   r   classmethodr   r   r   r   r   r   r      s    -
r   Nc           	         s  t |pd ttjstt tjst  j}|j} j}|j}|durl|durl|| }||d krL|j	d 
drLtdgn||d kra|j	d 
drat dg  fW  d   S tt  }|du s|j	d 
drtt|d |fddfdd|du s|j	d 
drtt|d | fdd fd	d  fW  d   S 1 sw   Y  dS )
a$  Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  remove_squeezable_dimensionsN   c                         t  dgS Nr#   r   squeezer   predictionsr   r   <lambda>       z.remove_squeezable_dimensions.<locals>.<lambda>c                          S r   r   r   r(   r   r   r*          c                      r$   r%   r&   r   labelsr   r   r*      r+   c                      r,   r   r   r   r.   r   r   r*      r-   )r   
name_scope
isinstancer   RaggedTensorr   "convert_to_tensor_v2_with_dispatchshapendimsdimsZis_compatible_withr   r'   rankr	   r
   equal)	r/   r)   Zexpected_rank_diffnameZpredictions_shapeZpredictions_rankZlabels_shapeZlabels_rank	rank_diffr   )r/   r)   r   r!   Z   sT   




$r!   c                    s  j }|j}dur^j }|j}|dur,|dur,|| dks$|d dkr+t\n2tt fddtdt d fdd}ttd|\du rffS j }|j}	|	dkrufS |dur|	dur|	| dkrtdgn||	 dkrt	dgfS t}
|
t fddfd	d
  fdd}tt|
dfdd|fS )a  Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
    If `sample_weight` is None, (y_pred, y_true) is returned.
  Nr"   r#   c                      s
   t  S r   )r!   r   y_predy_truer   r   r*      s    z.squeeze_or_expand_dimensions.<locals>.<lambda>c                      s   t   fddS )Nc                      s    fS r   r   r   r;   r   r   r*      s    z@squeeze_or_expand_dimensions.<locals>.<lambda>.<locals>.<lambda>r   r   )is_last_dim_1squeeze_dimsr<   r=   r   r   r*      s    r   c                      r$   r%   r&   r   sample_weightr   r   r*      r+   c                     s*   fdd} t  t d| fddS )Nc                      r$   r%   )r   expand_dimsr   r@   r   r   r*      r+   zMsqueeze_or_expand_dimensions.<locals>._maybe_expand_weights.<locals>.<lambda>r#   c                      r,   r   r   r   r@   r   r   r*      r-   r	   r
   r8   )Zexpand_weights)r:   rA   r   r   _maybe_expand_weights   s   z;squeeze_or_expand_dimensions.<locals>._maybe_expand_weightsc                      s   t  td S )Nr"   rC   r   )rD   maybe_squeeze_weightsr:   r   r   _maybe_adjust_weights   s   z;squeeze_or_expand_dimensions.<locals>._maybe_adjust_weightsc                      r,   r   r   r   r@   r   r   r*      r-   )
r4   r5   r!   r   r7   r
   r8   r	   r'   rB   )r<   r=   rA   Zy_pred_shapeZy_pred_rankZy_true_shapeZy_true_rankZmaybe_squeeze_dimsZweights_shapeZweights_rankZweights_rank_tensorrF   r   )rD   r>   rE   r:   rA   r?   r<   r=   r   squeeze_or_expand_dimensions   sP   



rG   c                 C   s   t | }t j||ddS )a,  Computes a safe mean of the losses.

  Args:
    losses: `Tensor` whose elements contain individual loss measurements.
    num_present: The number of measurable elements in `losses`.

  Returns:
    A scalar representing the mean of `losses`. If `num_present` is zero,
      then zero is returned.
  valuer9   )r
   
reduce_sumZ
div_no_nan)lossesZnum_presentZ
total_lossr   r   r   
_safe_mean   s   
rL   c                 C   sH   t d}tjtj| |d| jdW  d   S 1 sw   Y  dS )z3Computes the number of elements in `losses` tensor.Znum_elementsrI   )dtypeN)r   r0   r
   castr   sizerM   )rK   scoper   r   r   _num_elements  s   $rQ   c                 C   s8   |t jkr	| }|S t| }|t jkrt|t| }|S )z2Reduces the individual weighted loss measurements.)r   r   r
   rJ   r   rL   rQ   )weighted_losses	reductionlossr   r   r   reduce_weighted_loss	  s   


rU   c                 C   s   t | |t jkrt j}|du rd}t|pdQ |t _t	| t
jtjfs.t| } | j}t	|t
js<t|}t| d} t|d}t| d|\} }}t| |}t||}t||}|W  d   S 1 snw   Y  dS )a  Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, or be broadcastable to `losses`.
    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
      Default value is `SUM_OVER_BATCH_SIZE`.
    name: Optional name for the op.

  Raises:
    ValueError: If the shape of `sample_weight` is not compatible with `losses`.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
  N      ?Zweighted_lossfloat32)r   r   r   r   r   r0   r   Zget_default_graphZ_last_loss_reductionr1   r   ZKerasTensorr   r2   r   r3   rM   r
   rN   rG   multiplyrU   )rK   rA   rS   r9   Zinput_dtype_rR   rT   r   r   r   compute_weighted_loss  s4   






$rZ   c                 C   s"   t  j}|dkr| d| 9 } | S )zBScales and returns the given loss value by the number of replicas.r"   rV   )r   Zget_strategyZnum_replicas_in_sync)Z
loss_valueZnum_replicasr   r   r   scale_loss_for_distributionQ  s
   r[   c                    st   d | D ](}|j jr$ du s|j j jkr|j  n|j  hddhkr$d |j jr,|   S q r8 fdd| D } | S )ab  Cast a list of losses to a common dtype.

  If any loss is floating-point, they will all be casted to the most-precise
  floating-point loss. Otherwise the losses are not casted. We also skip casting
  losses if there are any complex losses.

  Args:
    losses: A list of losses.

  Returns:
    `losses`, but they have been casted to a common dtype.
  NZbfloat16Zfloat16rW   c                    s   g | ]}t | qS r   )r
   rN   ).0rT   Zhighest_floatr   r   
<listcomp>q  s    z/cast_losses_to_common_dtype.<locals>.<listcomp>)rM   Zis_floatingrO   Z
is_complex)rK   rT   r   r]   r   cast_losses_to_common_dtypeZ  s   r_   )r   N)NN)r   Ztensorflow.python.distributer   Ztensorflow.python.frameworkr   r   Ztensorflow.python.kerasr   Ztensorflow.python.keras.enginer   Ztensorflow.python.opsr   r	   r
   Ztensorflow.python.ops.raggedr   objectr   r!   rG   rL   rQ   r   rU   rZ   r[   r_   r   r   r   r   <module>   s0   >

AY

<	