o
    h                     @   s   d Z ddlmZmZ ddlmZmZmZmZm	Z	m
Z
mZ ddlmZ e
dZeegef Zeege	e f ZdZdd	d
ddZeeZeG dd dZdd eddeeef dedededeeef defddZdedefddZdS )z.Contains helpers to split tensors into shards.    )	dataclassfield)AnyCallableDictListOptionalTypeVarUnion   )loggingTensorT5GBl    J)i ʚ;i@B i  )TBGBMBKBc                   @   s\   e Zd ZU eddZeed< eee	f ed< eee
e f ed< eeef ed< dd Zd	S )
StateDictSplitF)init
is_shardedmetadatafilename_to_tensorstensor_to_filenamec                 C   s   t | jdk| _d S )N   )lenr   r   )self r   w/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/huggingface_hub/serialization/_base.py__post_init__-   s   zStateDictSplit.__post_init__N)__name__
__module____qualname__r   r   bool__annotations__r   strr   r   r   r   r   r   r   r   &   s   
 r   c                 C   s   d S )Nr   )tensorr   r   r   <lambda>6   s    r&   )get_storage_idmax_shard_size
state_dictget_storage_sizefilename_patternr'   r(   returnc                   s  i }g }i }d}d}	t |trt|}|  D ]X\}
}t |tr'td|
 q||}|dur@||v r;|| |
 q|
g||< ||}||krT|	|7 }	||
|i q|| |krc|| i }d}|||
< ||7 }|	|7 }	qt|dkr{|| t|}| D ]\}}|D ]}|d |v r|D ]}
| |
 ||
< q nqq|dkr|jdd t	d|	i t
|  i fdd	|  D d
S i }i }t|D ]%\}}|jd|d dd|dd |D ]}
 ||
< qt
| | < qt	d|	i||d
S )a9  
    Split a model state dictionary in shards so that each shard is smaller than a given size.

    The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
    made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
    have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
    [6+2+2GB], [6+2GB], [6GB].

    <Tip warning={true}>

    If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
    size greater than `max_shard_size`.

    </Tip>

    Args:
        state_dict (`Dict[str, Tensor]`):
            The state dictionary to save.
        get_storage_size (`Callable[[Tensor], int]`):
            A function that returns the size of a tensor when saved on disk in bytes.
        get_storage_id (`Callable[[Tensor], Optional[Any]]`, *optional*):
            A function that returns a unique identifier to a tensor storage. Multiple different tensors can share the
            same underlying storage. This identifier is guaranteed to be unique and constant for this tensor's storage
            during its lifetime. Two tensor storages with non-overlapping lifetimes may have the same id.
        filename_pattern (`str`, *optional*):
            The pattern to generate the files names in which the model will be saved. Pattern must be a string that
            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
        max_shard_size (`int` or `str`, *optional*):
            The maximum size of each shard, in bytes. Defaults to 5GB.

    Returns:
        [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
    r   z8Skipping tensor %s as it is a string (bnb serialization)Nr    )suffix
total_sizec                    s   i | ]}| qS r   r   ).0keyfilenamer   r   
<dictcomp>   s    z8split_state_dict_into_shards_factory.<locals>.<dictcomp>)r   r   r   -05dz-of-)
isinstancer$   parse_size_to_intitemsloggerinfoappendr   formatr   listkeys	enumerate)r)   r*   r+   r'   r(   storage_id_to_tensors
shard_listcurrent_shardcurrent_shard_sizer/   r1   r%   
storage_idtensor_size	nb_shardsr?   shardtensor_name_to_filenamer   idxr   r2   r   $split_state_dict_into_shards_factory1   sv   )





 
rK   size_as_strc              
   C   s   |   } | dd  }|tvrtd| d|  dt| }zt| dd   }W n tyB } ztd|  d| |d}~ww t|| S )aM  
    Parse a size expressed as a string with digits and unit (like `"5MB"`) to an integer (in bytes).

    Supported units are "TB", "GB", "MB", "KB".

    Args:
        size_as_str (`str`): The size to convert. Will be directly returned if an `int`.

    Example:

    ```py
    >>> parse_size_to_int("5MB")
    5000000
    ```
    NzUnit 'z:' not supported. Supported units are TB, GB, MB, KB. Got 'z'.z%Could not parse the size value from 'z': )stripupper
SIZE_UNITS
ValueErrorfloatint)rL   unit
multipliervalueer   r   r   r8      s   r8   N)__doc__dataclassesr   r   typingr   r   r   r   r   r	   r
   r-   r   r   rS   TensorSizeFn_TStorageIDFn_TMAX_SHARD_SIZErP   
get_logger__file__r:   r   r$   rK   r8   r   r   r   r   <module>   sB   $



 