o
    ®Ÿ?e:$  ã                   @   s’   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
l	mZ ddl	mZ G dd„ dejƒZdS )zNadam optimizer implementation.é    )Úops)Útensor_conversion)Úbackend_config)Úlearning_rate_schedule)Úoptimizer_v2)Ú	array_ops)Úcontrol_flow_ops)Úmath_ops)Ú	state_ops)Ú	variablesc                       sl   e Zd ZdZdZ					 d‡ fdd„	Zd	d
„ Zdd„ Z‡ fdd„Zddd„Z	ddd„Z
‡ fdd„Z‡  ZS )ÚNadama–  Optimizer that implements the NAdam algorithm.
  Much like Adam is essentially RMSprop with momentum, Nadam is Adam with
  Nesterov momentum.

  Args:
    learning_rate: A Tensor or a floating point value.  The learning rate.
    beta_1: A float value or a constant float tensor. The exponential decay
      rate for the 1st moment estimates.
    beta_2: A float value or a constant float tensor. The exponential decay
      rate for the exponentially weighted infinity norm.
    epsilon: A small constant for numerical stability.
    name: Optional name for the operations created when applying gradients.
      Defaults to `"Nadam"`.
    **kwargs: Keyword arguments. Allowed to be one of
      `"clipnorm"` or `"clipvalue"`.
      `"clipnorm"` (float) clips gradients by norm; `"clipvalue"` (float) clips
      gradients by value.

  Usage Example:
    >>> opt = tf.keras.optimizers.Nadam(learning_rate=0.2)
    >>> var1 = tf.Variable(10.0)
    >>> loss = lambda: (var1 ** 2) / 2.0
    >>> step_count = opt.minimize(loss, [var1]).numpy()
    >>> "{:.1f}".format(var1.numpy())
    9.8

  Reference:
    - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf).
  Tçü©ñÒMbP?çÍÌÌÌÌÌì?ç+‡ÙÎ÷ï?çH¯¼šò×z>c                    sš   |  dd¡|d< | d|¡}t|tjƒrtdƒ‚tt| ƒj|fi |¤Ž |  	d| d|¡¡ |  	d| j
¡ |  	d|¡ |  	d|¡ |pFt ¡ | _d | _d S )	NZschedule_decaygü©ñÒMbp?ÚdecayÚlrzdThe Nadam optimizer does not support tf.keras.optimizers.LearningRateSchedules as the learning rate.Úlearning_rateÚbeta_1Úbeta_2)ÚpopÚgetÚ
isinstancer   ZLearningRateScheduleÚ
ValueErrorÚsuperr   Ú__init__Z
_set_hyperÚ_initial_decayr   ÚepsilonÚ_m_cache)Úselfr   r   r   r   ÚnameÚkwargs©Ú	__class__© úk/home/www/facesmatcher.com/pyenv/lib/python3.10/site-packages/tensorflow/python/keras/optimizer_v2/nadam.pyr   ?   s   
zNadam.__init__c                 C   sp   |d j j}| jd u r | jdg |ddtjjd| _| j | j¡ |D ]}|  	|d¡ q"|D ]}|  	|d¡ q-d S )Nr   Zmomentum_cacheZonesF)ÚshapeÚdtypeZinitializerZ	trainableZaggregationÚmÚv)
r'   Ú
base_dtyper   Z
add_weightÚtf_variablesZVariableAggregationZONLY_FIRST_REPLICAZ_weightsÚappendZadd_slot)r   Úvar_listÚ	var_dtypeÚvarr$   r$   r%   Ú_create_slotsV   s    
úþzNadam._create_slotsc                 C   s<  t  |  d|¡¡}t  |  d|¡¡}t  |  d|¡¡}t | jd |¡}t | jd |¡}t d|¡}	|ddt |	| j| ¡   }
|ddt |	| j| ¡   }t | j|¡|
 }|| j	j
u rmt  tj| j	|| jd	¡}|| }t|| t | j|¡|||
|d| d| d|
 d| d| dt ||¡ d
|||f< d S )Nr   r   r   é   é   g¸…ëQ¸î?g      ð?g      à?©Zuse_locking)Úlr_tÚneg_lr_tr   Úbeta_1_tÚbeta_2_tÚm_tÚm_t_1Úone_minus_beta_1_tÚone_minus_beta_2_tÚone_minus_m_tÚone_minus_m_schedule_newÚone_minus_m_schedule_nextÚv_t_prime_denominator)r   ÚidentityZ
_get_hyperr	   ÚcastZ
iterationsÚpowr   Ú_m_cache_readr   r'   r
   ÚassignÚ_use_lockingÚdictr   Z"convert_to_tensor_v2_with_dispatchr   )r   Ú
var_devicer.   Úapply_stater4   r6   r7   Z
local_stepZ	next_stepZ
decay_baser8   r9   Zm_schedule_newZm_schedule_nextr$   r$   r%   Ú_prepare_locali   sF   ÿÿ
ÿÿñzNadam._prepare_localc                    s   t  | j¡| _tt| ƒ |¡S ©N)r   r@   r   rC   r   r   Ú_prepare)r   r-   r"   r$   r%   rK      s   zNadam._prepareNc                 C   s  |j |jj}}|pi  ||f¡p|  ||¡}|  |d¡}|  |d¡}||d  }	|d | |d |  }
tj||
| jd}
|
|d  }|d | |d	 t	 
|¡  }tj||| jd}||d
  }|d |	 |d |  }||d | t	 |¡|d    }tj||| jdjS )Nr(   r)   r=   r6   r:   r3   r>   r7   r;   r?   r<   r9   r4   r   )Údevicer'   r*   r   Ú_fallback_apply_stateÚget_slotr
   rD   rE   r	   ZsquareÚsqrtÚop)r   Úgradr/   rH   rG   r.   Úcoefficientsr(   r)   Úg_primer8   Ú	m_t_primeÚv_tÚ	v_t_primeÚm_t_barZvar_tr$   r$   r%   Ú_resource_apply_dense”   s0   
ÿ

ÿ
ÿ

ÿÿzNadam._resource_apply_densec                 C   sš  |j |jj}}|pi  ||f¡p|  ||¡}|  |d¡}|  |d¡}	||d  }
||d  }tj|||d  | jd}t	 
|g¡ |  |||¡}t ||¡}W d   ƒ n1 sZw   Y  ||d  }|d |
 |d	 |  }|| |d
  }tj|	|	|d  | jd}t	 
|g¡ |  |	||¡}t ||¡}W d   ƒ n1 s¤w   Y  ||d  }t |¡|d  }|  |||d | | ¡}tj|||gŽ S )Nr(   r)   r=   r:   r6   r3   r>   r<   r9   r;   r7   r?   r   r5   )rL   r'   r*   r   rM   rN   r
   rD   rE   r   Zcontrol_dependenciesZ_resource_scatter_addr   Úgatherr	   rO   r   Úgroup)r   rQ   r/   ÚindicesrH   rG   r.   rR   r(   r)   rS   Zm_scaled_g_valuesr8   Z	m_t_slicerT   rW   Zv_scaled_g_valuesrU   Z	v_t_slicerV   Zv_prime_sqrt_plus_epsZ
var_updater$   r$   r%   Ú_resource_apply_sparse«   sD   
ÿÿþ

ÿÿþþzNadam._resource_apply_sparsec                    s>   t t| ƒ ¡ }| |  d¡| j|  d¡|  d¡| jdœ¡ |S )Nr   r   r   )r   r   r   r   r   )r   r   Ú
get_configÚupdateZ_serialize_hyperparameterr   r   )r   Úconfigr"   r$   r%   r]   Ó   s   ûzNadam.get_config)r   r   r   r   r   rJ   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Z_HAS_AGGREGATE_GRADr   r0   rI   rK   rX   r\   r]   Ú__classcell__r$   r$   r"   r%   r      s    û&

(r   N)rc   Ztensorflow.python.frameworkr   r   Ztensorflow.python.kerasr   Z$tensorflow.python.keras.optimizer_v2r   r   Ztensorflow.python.opsr   r   r	   r
   r   r+   ZOptimizerV2r   r$   r$   r$   r%   Ú<module>   s   