o
    hv                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlZer8d dlmZ ddgZG d	d
 d
ZG dd deZdddZG dd dZdS )    )annotationsN)abcdefaultdict)Enum)AnycastOptionaloverloadTYPE_CHECKINGUnion)IterableOptState
GradScalerc                   @  s$   e Zd ZdZdddZdd
dZdS )_MultiDeviceReplicatorz^Lazily serves copies of a tensor to requested devices.

    Copies are cached per-device.
    master_tensortorch.TensorreturnNonec                 C  s   || _ i | _d S N)master_per_device_tensors)selfr    r   i/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/amp/grad_scaler.py__init__   s   
z_MultiDeviceReplicator.__init__devicetorch.devicec                 C  s6   | j |d }|d u r| jj|ddd}|| j |< |S )NT)r   non_blockingcopy)r   getr   to)r   r   retvalr   r   r   r      s
   
z_MultiDeviceReplicator.getN)r   r   r   r   )r   r   r   r   )__name__
__module____qualname____doc__r   r   r   r   r   r   r      s    
r   c                   @  s   e Zd ZdZdZdZdS )r   r         N)r"   r#   r$   READYUNSCALEDSTEPPEDr   r   r   r   r   +   s    r   dict[str, Any]c                   C  s   t ji dS )N)stagefound_inf_per_device)r   r(   r   r   r   r   _refresh_per_optimizer_state1   s   r.   c                   @  sR  e Zd ZdZ						dhdiddZdjddZdkddZedld d!Zedmd#d!Zednd%d!Zedod'd!Zdpd)d!Zdqd0d1Z	drd2d3Z
dsd:d;Zdtd<d=ZdudvdAdBZdwdDdEZdxdFdGZdxdHdIZdydKdLZdxdMdNZdydOdPZdzdQdRZd{dTdUZdzdVdWZd|dXdYZd}dZd[Zd~d]d^Zd}d_d`ZddbdcZddddeZddfdgZd>S )r   aX  An instance ``scaler`` of :class:`GradScaler`.

    Helps perform the steps of gradient scaling
    conveniently.

    * ``scaler.scale(loss)`` multiplies a given loss by ``scaler``'s current scale factor.
    * ``scaler.step(optimizer)`` safely unscales gradients and calls ``optimizer.step()``.
    * ``scaler.update()`` updates ``scaler``'s scale factor.

    Example::

        # Creates a GradScaler once at the beginning of training.
        scaler = GradScaler()

        for epoch in epochs:
            for input, target in data:
                optimizer.zero_grad()
                output = model(input)
                loss = loss_fn(output, target)

                # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
                scaler.scale(loss).backward()

                # scaler.step() first unscales gradients of the optimizer's params.
                # If gradients don't contain infs/NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(optimizer)

                # Updates the scale for next iteration.
                scaler.update()

    See the :ref:`Automatic Mixed Precision examples<amp-examples>` for usage
    (along with autocasting) in more complex cases like gradient clipping, gradient accumulation, gradient penalty,
    and multiple losses/optimizers.

    ``scaler`` dynamically estimates the scale factor each iteration.  To minimize gradient underflow,
    a large scale factor should be used.  However, ``float16`` values can "overflow" (become inf or NaN) if
    the scale factor is too large.  Therefore, the optimal scale factor is the largest factor that can be used
    without incurring inf or NaN gradient values.
    ``scaler`` approximates the optimal scale factor over time by checking the gradients for infs and NaNs during every
    ``scaler.step(optimizer)`` (or optional separate ``scaler.unscale_(optimizer)``, see :meth:`unscale_`).

    * If infs/NaNs are found, ``scaler.step(optimizer)`` skips the underlying ``optimizer.step()`` (so the params
      themselves remain uncorrupted) and ``update()`` multiplies the scale by ``backoff_factor``.

    * If no infs/NaNs are found, ``scaler.step(optimizer)`` runs the underlying ``optimizer.step()`` as usual.
      If ``growth_interval`` unskipped iterations occur consecutively, ``update()`` multiplies the scale by
      ``growth_factor``.

    The scale factor often causes infs/NaNs to appear in gradients for the first few iterations as its
    value calibrates.  ``scaler.step`` will skip the underlying ``optimizer.step()`` for these
    iterations.  After that, step skipping should occur rarely (once every few hundred or thousand iterations).

    Args:
        device (str, optional, default="cuda"): Device type to use. Possible values are: 'cuda' and 'cpu'.
            The type is the same as the `type` attribute of a :class:`torch.device`.
            Thus, you may obtain the device type of a tensor using `Tensor.device.type`.
        init_scale (float, optional, default=2.**16):  Initial scale factor.
        growth_factor (float, optional, default=2.0):  Factor by which the scale is multiplied during
            :meth:`update` if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
        backoff_factor (float, optional, default=0.5):  Factor by which the scale is multiplied during
            :meth:`update` if inf/NaN gradients occur in an iteration.
        growth_interval (int, optional, default=2000):  Number of consecutive iterations without inf/NaN gradients
            that must occur for the scale to be multiplied by ``growth_factor``.
        enabled (bool, optional):  If ``False``, disables gradient scaling. :meth:`step` simply
            invokes the underlying ``optimizer.step()``, and other methods become no-ops.
            Default: ``True``
    cuda      @       @      ?  Tr   str
init_scalefloatgrowth_factorbackoff_factorgrowth_intervalintenabledboolr   r   c                 C  s   || _ || _| j dkr|rtjjj rtd d| _| jrK|dks'J d|dk s/J d|| _	d | _
|| _|| _|| _d| _d | _tt| _d S d S )Nr/   zLtorch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.F      ?z The growth factor must be > 1.0.z!The backoff factor must be < 1.0.r   )_device_enabledtorchr/   ampcommonamp_definitely_not_availablewarningswarn_init_scale_scale_growth_factor_backoff_factor_growth_interval_init_growth_tracker_growth_trackerr   r.   _per_optimizer_states)r   r   r5   r7   r8   r9   r;   r   r   r   r   {   s,   	

zGradScaler.__init__funcname!tuple[torch.Tensor, torch.Tensor]c                 C  sL   d}| j d usJ d| d| | jd us J d| d| | j | jfS )NzaThis may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration.z
Attempted z but _scale is None.  z but _growth_tracker is None.  )rG   rL   )r   rN   fixr   r   r   _check_scale_growth_tracker   s   z&GradScaler._check_scale_growth_trackerdevr   c                 C  sF   | j d u s	J dtjd| jtj|d| _tjd| jtj|d| _ d S )Nz)_growth_tracker initialized before _scaler   dtyper   )rL   r@   fullrF   float32rG   rK   int32)r   rR   r   r   r   _lazy_init_scale_growth_tracker   s
   z*GradScaler._lazy_init_scale_growth_trackeroutputsr   c                 C     d S r   r   r   rY   r   r   r   scale      zGradScaler.scalelist[torch.Tensor]c                 C  rZ   r   r   r[   r   r   r   r\      r]   tuple[torch.Tensor, ...]c                 C  rZ   r   r   r[   r   r   r   r\      r]   Iterable[torch.Tensor]c                 C  rZ   r   r   r[   r   r   r   r\      r]   +Union[torch.Tensor, Iterable[torch.Tensor]]c                   sn   j s|S t|tjr(jdu r|j jdusJ |jj|jdd S g d fdd  |S )	a2  
        Multiplies ('scales') a tensor or list of tensors by the scale factor.

        Returns scaled outputs.  If this instance of :class:`GradScaler` is not enabled, outputs are returned
        unmodified.

        Args:
            outputs (Tensor or iterable of Tensors):  Outputs to scale.
        NTr   r   valra   c                   s   t | tjr0tdkr&jd u r| j jd usJ tj | d 	| j S t | t
jrJt | }t | ttfrHt| |S |S td)Nr   z2outputs must be a Tensor or an iterable of Tensors)
isinstancer@   TensorlenrG   rX   r   appendr   r   r   r   maplisttupletype
ValueError)rc   iterableapply_scaler   stashr   r   ro      s   

z%GradScaler.scale.<locals>.apply_scale)rc   ra   )r?   rd   r@   re   rG   rX   r   r    r[   r   rn   r   r\      s   
	optimizertorch.optim.Optimizer	inv_scale	found_inf
allow_fp16 dict[torch.device, torch.Tensor]c              
   C  s"  t |}t |}tdd }t t |jD ]I}|d D ]B}	t|	tjs&J |	jd u r,q|s9|	jjtj	kr9t
d|	jjrP|	jjtj	u rJ|	j |	_|	j }
n|	j}
||
j |
j |
 qq| D ]\}}| D ]}t||||| qlqdW d    |jS 1 sw   Y  |jS )Nc                   S  s   t tS r   )r   ri   r   r   r   r   <lambda>   s    z,GradScaler._unscale_grads_.<locals>.<lambda>paramsz%Attempting to unscale FP16 gradients.)r   r   r@   no_gradparam_groupsrd   re   gradrT   float16rl   	is_sparsecoalesce_valuesr   rg   itemsvalues*_amp_foreach_non_finite_check_and_unscale_r   r   )r   rq   rs   rt   ru   per_device_inv_scaleper_device_found_infper_device_and_dtype_gradsgroupparam
to_unscaler   per_dtype_gradsgradsr   r   r   _unscale_grads_   sF   






  zGradScaler._unscale_grads_c                 C  s   | j sdS | d | jt| }|d tju rtd|d tju r'td| jdus.J | j	 
  }tjddtj| jjd}| |||d	|d
< tj|d< dS )as  
        Divides ("unscales") the optimizer's gradient tensors by the scale factor.

        :meth:`unscale_` is optional, serving cases where you need to
        :ref:`modify or inspect gradients<working-with-unscaled-gradients>`
        between the backward pass(es) and :meth:`step`.
        If :meth:`unscale_` is not called explicitly,  gradients will be unscaled  automatically during :meth:`step`.

        Simple example, using :meth:`unscale_` to enable clipping of unscaled gradients::

            ...
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
            scaler.step(optimizer)
            scaler.update()

        Args:
            optimizer (torch.optim.Optimizer):  Optimizer that owns the gradients to be unscaled.

        .. note::
            :meth:`unscale_` does not incur a CPU-GPU sync.

        .. warning::
            :meth:`unscale_` should only be called once per optimizer per :meth:`step` call,
            and only after all gradients for that optimizer's assigned parameters have been accumulated.
            Calling :meth:`unscale_` twice for a given optimizer between each :meth:`step` triggers a RuntimeError.

        .. warning::
            :meth:`unscale_` may unscale sparse gradients out of place, replacing the ``.grad`` attribute.
        Nunscale_r,   zMunscale_() has already been called on this optimizer since the last update().z(unscale_() is being called after step().r           rS   Fr-   )r?   rQ   rM   idr   r)   RuntimeErrorr*   rG   double
reciprocalr6   r@   rU   rV   r   r   )r   rq   optimizer_staters   rt   r   r   r   r   #  s"    
zGradScaler.unscale_r   r+   argsr   kwargsOptional[float]c                 O  s2   d }t dd |d  D s|j|i |}|S )Nc                 s  s    | ]}|  V  qd S r   )item).0vr   r   r   	<genexpr>c  s    z-GradScaler._maybe_opt_step.<locals>.<genexpr>r-   )sumr   step)r   rq   r   r   r   r!   r   r   r   _maybe_opt_step[  s   zGradScaler._maybe_opt_stepc           	        s  | j s|j|i |S d|v rtd| d | jt| }|d tju r*tdd}t|ddr|}d	t	
|jjv }|rMtd
t |d	| i nB|d tju rY| | |    duscJ ttjt fdd|d  D }|d tjkrt|ddn t|dd |_||_|j|i |}tj|d< |s|`|`|S |d tju r| | t|d dksJ d| j||g|R i |}tj|d< |S )a  Invoke ``unscale_(optimizer)`` followed by parameter update, if gradients are not infs/NaN.

        :meth:`step` carries out the following two operations:

        1.  Internally invokes ``unscale_(optimizer)`` (unless :meth:`unscale_` was explicitly called for ``optimizer``
            earlier in the iteration).  As part of the :meth:`unscale_`, gradients are checked for infs/NaNs.
        2.  If no inf/NaN gradients are found, invokes ``optimizer.step()`` using the unscaled
            gradients.  Otherwise, ``optimizer.step()`` is skipped to avoid corrupting the params.

        ``*args`` and ``**kwargs`` are forwarded to ``optimizer.step()``.

        Returns the return value of ``optimizer.step(*args, **kwargs)``.

        Args:
            optimizer (torch.optim.Optimizer):  Optimizer that applies the gradients.
            args:  Any arguments.
            kwargs:  Any keyword arguments.

        .. warning::
            Closure use is not currently supported.
        closurez@Closure use is not currently supported if GradScaler is enabled.r   r,   z7step() has already been called since the last update().N_step_supports_amp_scalingFgrad_scalerzGradScaler is going to stop passing itself as a keyword argument to the passed optimizer. In the near future GradScaler registers `grad_scale: Tensor` and `found_inf: Tensor` to the passed optimizer and let the optimizer use them directly.c                   s   g | ]
}|j  jd dqS )T)r   )r    r   )r   tscalerr   r   
<listcomp>  s    z#GradScaler.step.<locals>.<listcomp>r-   
grad_scaler&   r   z/No inf checks were recorded for this optimizer.)r?   r   r   rQ   rM   r   r   r*   getattrinspect	signature
parametersrD   rE   FutureWarningupdater(   _check_inf_per_device_get_scale_asyncr   r@   re   r   r   r)   r   rt   r   rf   r   )	r   rq   r   r   r   r!   kwargs_has_grad_scaler_kwargrt   r   r   r   r   g  sl   






zGradScaler.stepN	new_scale$Optional[Union[float, torch.Tensor]]c                   s  | j sdS | d\ }|durJ| jdusJ t|tr#| j| ndd}|jj| jks0J ||	 dks:J ||j
du sCJ || j| n= fdd| j D }t|dks`J d	|d }t|dkrztdt|D ]}||| 7 }qqt ||| j| j| j tt| _dS )
a?  Update the scale factor.

        If any optimizer steps were skipped the scale is multiplied by ``backoff_factor``
        to reduce it. If ``growth_interval`` unskipped iterations occurred consecutively,
        the scale is multiplied by ``growth_factor`` to increase it.

        Passing ``new_scale`` sets the new scale value manually. (``new_scale`` is not
        used directly, it's used to fill GradScaler's internal scale tensor. So if
        ``new_scale`` was a tensor, later in-place changes to that tensor will not further
        affect the scale GradScaler uses internally.)

        Args:
            new_scale (float or :class:`torch.Tensor`, optional, default=None):  New scale factor.

        .. warning::
            :meth:`update` should only be called at the end of the iteration, after ``scaler.step(optimizer)`` has
            been invoked for all optimizers used this iteration.

        .. warning::
            For performance reasons, we do not check the scale factor value to avoid synchronizations,
            so the scale factor is not guaranteed to be above 1. If the scale falls below 1 and/or
            you are seeing NaNs in your gradients or loss, something is likely wrong. For example,
            bf16-pretrained models are often incompatible with AMP/fp16 due to differing dynamic ranges.
        Nr   znew_scale should be a float or a 1-element torch.cuda.FloatTensor or                     torch.FloatTensor with requires_grad=False.r&   Fc                   s.   g | ]}|d    D ]
}|j jddq
qS )r-   Trb   )r   r    r   )r   statert   rG   r   r   r      s    
z%GradScaler.update.<locals>.<listcomp>r   z,No inf checks were recorded prior to update.)r?   rQ   rG   rd   r6   fill_r   rk   r>   numelrequires_gradcopy_rM   r   rf   ranger@   _amp_update_scale_rH   rI   rJ   r   r.   )r   r   rL   reason
found_infsfound_inf_combinedir   r   r   r     s:   


zGradScaler.updateOptional[torch.Tensor]c                 C     | j S r   r   r   r   r   r   r     s   zGradScaler._get_scale_asyncc                 C  s.   | j r|   }du r| jS tt| S dS )zReturn a Python float containing the current scale, or 1.0 if scaling is disabled.

        .. warning::
            :meth:`get_scale` incurs a CPU-GPU sync.
        Nr=   )r?   r   rF   r   r6   r   )r   r\   r   r   r   	get_scale  s   zGradScaler.get_scalec                 C  r   )z9Return a Python float containing the scale growth factor.rH   r   r   r   r   get_growth_factor*     zGradScaler.get_growth_factor
new_factorc                 C  
   || _ dS )zSet a new scale growth factor.

        Args:
            new_scale (float):  Value to use as the new scale growth factor.
        Nr   r   r   r   r   r   set_growth_factor.     
zGradScaler.set_growth_factorc                 C  r   )z:Return a Python float containing the scale backoff factor.rI   r   r   r   r   get_backoff_factor6  r   zGradScaler.get_backoff_factorc                 C  r   )zSet a new scale backoff factor.

        Args:
            new_scale (float):  Value to use as the new scale backoff factor.
        Nr   r   r   r   r   set_backoff_factor:  r   zGradScaler.set_backoff_factorc                 C  r   )z3Return a Python int containing the growth interval.rJ   r   r   r   r   get_growth_intervalB  r   zGradScaler.get_growth_intervalnew_intervalc                 C  r   )z|Set a new growth interval.

        Args:
            new_interval (int):  Value to use as the new growth interval.
        Nr   )r   r   r   r   r   set_growth_intervalF  r   zGradScaler.set_growth_intervalc                 C  s*   | j r| jd u r| jS tt| j S dS )Nr   )r?   rL   rK   r   r:   r   r   r   r   r   _get_growth_trackerN  s   
zGradScaler._get_growth_trackerc                 C  r   )z:Return a bool indicating whether this instance is enabled.)r?   r   r   r   r   
is_enabledW  r   zGradScaler.is_enabledc                 C  s(   | j r|  | j| j| j|  dS i S )a  Return the state of the scaler as a :class:`dict`.

        It contains five entries:

        * ``"scale"`` - a Python float containing the current scale
        * ``"growth_factor"`` - a Python float containing the current growth factor
        * ``"backoff_factor"`` - a Python float containing the current backoff factor
        * ``"growth_interval"`` - a Python int containing the current growth interval
        * ``"_growth_tracker"`` - a Python int containing the number of recent consecutive unskipped steps.

        If this instance is not enabled, returns an empty dict.

        .. note::
           If you wish to checkpoint the scaler's state after a particular iteration, :meth:`state_dict`
           should be called after :meth:`update`.
        )r\   r7   r8   r9   rL   )r?   r   rH   rI   rJ   r   r   r   r   r   
state_dict[  s   zGradScaler.state_dictr   c                 C  s   | j sdS t|dkrtdtt|d | _| jdur$| j|d  tt|d | _tt|d | _	tt
|d | _tt
|d | _| jdurS| j|d  dS dS )	zLoad the scaler state.

        If this instance is disabled, :meth:`load_state_dict` is a no-op.

        Args:
           state_dict(dict): scaler state.  Should be an object returned from a call to :meth:`state_dict`.
        Nr   zeThe source state dict is empty, possibly because it was saved from a disabled instance of GradScaler.r\   r7   r8   r9   rL   )r?   rf   r   r   r6   rF   rG   r   rH   rI   r:   rJ   rK   rL   )r   r   r   r   r   load_state_dictv  s    

zGradScaler.load_state_dictc                 C  sR   | j  }| jr't| jdksJ d|  |d< |  |d< d |d< d |d< |S )Nr   zpA GradScaler instance may only be pickled at the beginning of an iteration, or at the end after scaler.update().rF   rK   rG   rL   )__dict__r   r?   rf   rM   r   r   r   r   r   r   r   __getstate__  s   
zGradScaler.__getstate__r   c                 C  s   | j | d S r   )r   r   r   r   r   r   __setstate__  s   zGradScaler.__setstate__c                 C  sj   |  d\}}tjddtj|jd}tjddtj|jd}| |||d| jt| d< | jt| d S )Nr   r   r=   rS   r   Tr-   )rQ   r@   rU   rV   r   r   rM   r   )r   rq   rG   _dummy_inv_scalert   r   r   r   r     s   z GradScaler._check_inf_per_devicec                 C  s   | j t| d S )Nr-   )rM   r   )r   rq   r   r   r   _found_inf_per_device  s   z GradScaler._found_inf_per_device)r/   r0   r1   r2   r3   T)r   r4   r5   r6   r7   r6   r8   r6   r9   r:   r;   r<   r   r   )rN   r4   r   rO   )rR   r   r   r   )rY   r   r   r   )rY   r^   r   r^   )rY   r_   r   r_   )rY   r`   r   r`   )rY   ra   r   ra   )
rq   rr   rs   r   rt   r   ru   r<   r   rv   )rq   rr   r   r   )
rq   rr   r   r+   r   r   r   r   r   r   )rq   rr   r   r   r   r   r   r   r   )r   r   r   r   )r   r   )r   r6   )r   r6   r   r   )r   r:   )r   r:   r   r   )r   r<   r   r+   )r   r+   r   r   )r   r+   r   r   )rq   rr   r   r+   )r"   r#   r$   r%   r   rQ   rX   r	   r\   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   5   sR    G
#


-
5
8
l
F








	




r   )
__future__r   r   rD   collectionsr   r   enumr   typingr   r   r   r	   r
   r   r@   collections.abcr   __all__r   r   r.   r   r   r   r   r   <module>   s    
