o
    ïÇh@<  ã                
   @   sÊ  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ e e¡Zh d£Zh d	£Ze	G d
d„ dƒƒZdeejef deeef fdd„Z deeejf deeeeeee!f f  ddfdd„Z"deeejf deeejf ddfdd„Z#dej$dedeeeje!f ddfdd„Z%deeee!f de!fdd„Z&dedefdd„Z'd ed!ee ddfd"d#„Z(d$ej$d%ej)de*fd&d'„Z+dS )(é    N)Úcontextmanager)Ú	dataclassÚfield)ÚPath)ÚAnyÚDictÚ	GeneratorÚIterableÚTupleÚUnioné   )ÚDDUFCorruptedFileErrorÚDDUFExportErrorÚDDUFInvalidEntryNameError>   ú.txtú.jsonú.modelú.safetensors>   úconfig.jsonúscheduler_config.jsonútokenizer_config.jsonúpreprocessor_config.jsonc                   @   sl   e Zd ZU dZeed< eed< eed< eddZe	ed< e
deed	d	f fd
d„ƒZddedefdd„Zd	S )Ú	DDUFEntrya¿  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    ÚfilenameÚlengthÚoffsetF)ÚreprÚ	dduf_pathÚreturnNc              	   c   s’    | j  d¡8}tj| ¡ dtjd}|| j| j| j … V  W d  ƒ n1 s*w   Y  W d  ƒ dS W d  ƒ dS 1 sBw   Y  dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        Úrbr   )r   ÚaccessN)r   ÚopenÚmmapÚfilenoÚACCESS_READr   r   )ÚselfÚfÚmm© r(   úw/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/huggingface_hub/serialization/_dduf.pyÚas_mmap9   s   €ÿÿ"ÿzDDUFEntry.as_mmapúutf-8Úencodingc                 C   sP   | j  d¡}| | j¡ | | j¡j|dW  d  ƒ S 1 s!w   Y  dS )zÕRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r   )r,   N)r   r!   Úseekr   Úreadr   Údecode)r%   r,   r&   r(   r(   r)   Ú	read_textJ   s   $þzDDUFEntry.read_text)r+   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚstrÚ__annotations__Úintr   r   r   r   r   Úbytesr*   r0   r(   r(   r(   r)   r   "   s   
 r   r   r   c                 C   s.  i }t | ƒ} t d| › ¡ t t| ƒd¡Q}| ¡ D ]D}t d|j› ¡ |j	tj
kr0tdƒ‚zt|jƒ W n tyL } z	td|j› ƒ|‚d}~ww t||ƒ}t|j||j| d||j< qW d  ƒ n1 sjw   Y  d|vrwtd	ƒ‚t |d  ¡ ¡}t|| ¡ ƒ t d
| › dt|ƒ› d¡ |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file ÚrzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   úmodel_index.jsonú7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   ÚloggerÚinfoÚzipfileÚZipFiler5   ÚinfolistÚdebugr   Úcompress_typeÚ
ZIP_STOREDr   Ú_validate_dduf_entry_namer   Ú_get_data_offsetr   Ú	file_sizeÚjsonÚloadsr0   Ú_validate_dduf_structureÚkeysÚlen)r   ÚentriesÚzfr=   Úer   Úindexr(   r(   r)   Úread_dduf_fileZ   s4   '€ÿ
ÿôÿrP   rL   c                 C   sl  t  d| › d¡ tƒ }d}t t| ƒdtj¡k}|D ]`\}}||v r*td|› ƒ‚| |¡ |dkrQzt	 
t|ƒ ¡ ¡}W n t	jyP } ztdƒ|‚d}~ww zt|ƒ}W n tyk } ztd|› ƒ|‚d}~ww t  d	|› d
¡ t|||ƒ qW d  ƒ n1 s†w   Y  |du r“tdƒ‚zt||ƒ W n ty« } ztdƒ|‚d}~ww t  d| › ¡ dS )a¯  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file 'ú'NÚwzCan't add duplicate entry: r:   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer;   zInvalid DDUF file structure.zDone writing DDUF file )r<   r=   Úsetr>   r?   r5   rC   r   ÚaddrG   rH   Ú_load_contentr/   ÚJSONDecodeErrorrD   r   rA   Ú_dump_content_in_archiverI   r   )r   rL   Ú	filenamesrO   Úarchiver   ÚcontentrN   r(   r(   r)   Úexport_entries_as_ddufŸ   sD   :

€ÿ€ÿðÿ
€ÿr[   Úfolder_pathc                    s6   t ˆ ƒ‰ dtttt f  f‡ fdd„}t| |ƒ ƒ dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r   c                  3   s|    t ˆ ƒ d¡D ]3} |  ¡ sq| jtvrt d| › d¡ q|  ˆ ¡}t|j	ƒdkr4t d| › d¡ q| 
¡ | fV  qd S )Nz**/*zSkipping file 'z' (file type not allowed)é   z"' (nested directories not allowed))r   ÚglobÚis_fileÚsuffixÚDDUF_ALLOWED_ENTRIESr<   rA   Úrelative_torK   ÚpartsÚas_posix)ÚpathÚpath_in_archive©r\   r(   r)   Ú_iterate_over_folder  s   €

öz3export_folder_as_dduf.<locals>._iterate_over_folderN)r   r	   r
   r5   r[   )r   r\   rh   r(   rg   r)   Úexport_folder_as_ddufú   s   ri   rY   r   rZ   c              	   C   s¼   | j |dddL}t|ttfƒr1t|ƒ}|  d¡}t ||d¡ W d   ƒ n1 s+w   Y  nt|tƒr<| |¡ ntd|› dƒ‚W d   ƒ d S W d   ƒ d S 1 sWw   Y  d S )NrR   T)Úforce_zip64r   i  € zInvalid content type for z. Must be str, Path or bytes.)	r!   Ú
isinstancer5   r   ÚshutilÚcopyfileobjr8   Úwriter   )rY   r   rZ   Ú
archive_fhÚcontent_pathÚ
content_fhr(   r(   r)   rW     s   ÿ€
€ø"úrW   c                 C   s<   t | ttfƒrt| ƒ ¡ S t | tƒr| S tdt| ƒ› dƒ‚)zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got Ú.)rk   r5   r   Ú
read_bytesr8   r   Útype)rZ   r(   r(   r)   rU   *  s
   
rU   Ú
entry_namec                 C   sh   d|   d¡d  tvrtd| › ƒ‚d| v rtd| › dƒ‚|  d¡} |  d¡dkr2td| › dƒ‚| S )	Nrr   éÿÿÿÿzFile type not allowed: ú\z0Entry names must use UNIX separators ('/'). Got ú/é   z-DDUF only supports 1 level of directory. Got )Úsplitra   r   ÚstripÚcount)ru   r(   r(   r)   rD   7  s   
rD   rO   Úentry_namesc                    s€   t | tƒstdt| ƒ› dƒ‚dd„ ˆ D ƒ}|D ]%‰ˆ| vr&tdˆ› dƒ‚t‡ ‡fdd„tD ƒƒs=td	ˆ› d
t› dƒ‚qdS )a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got rr   c                 S   s"   h | ]}d |v r|  d ¡d ’qS )rx   r   )rz   )Ú.0Úentryr(   r(   r)   Ú	<setcomp>W  s   " z+_validate_dduf_structure.<locals>.<setcomp>zMissing required entry 'z' in 'model_index.json'.c                 3   s"    | ]}ˆ› d |› ˆ v V  qdS )rx   Nr(   )r~   Úrequired_entry©r}   Úfolderr(   r)   Ú	<genexpr>[  s   €  z+_validate_dduf_structure.<locals>.<genexpr>z!Missing required file in folder 'z!'. Must contains at least one of N)rk   Údictr   rt   ÚanyÚDDUF_FOLDER_REQUIRED_ENTRIES)rO   r}   Údduf_foldersr(   r‚   r)   rI   B  s   
ÿÿýrI   rM   r=   c                 C   s€   | j du r	tdƒ‚|j}| j  |¡ | j  d¡}t|ƒdk r"tdƒ‚t |dd… d¡}t |dd… d¡}|d | | }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.é   zIncomplete local file header.é   é   Úlittle)Úfpr   Úheader_offsetr-   r.   rK   r7   Ú
from_bytes)rM   r=   rŽ   Úlocal_file_headerÚfilename_lenÚextra_field_lenÚdata_offsetr(   r(   r)   rE   a  s   
rE   ),rG   Úloggingr"   Úosrl   r>   Ú
contextlibr   Údataclassesr   r   Úpathlibr   Útypingr   r   r   r	   r
   r   Úerrorsr   r   r   Ú	getLoggerr1   r<   ra   r‡   r   ÚPathLiker5   rP   r8   r[   ri   r?   rW   rU   rD   rI   ÚZipInfor7   rE   r(   r(   r(   r)   Ú<module>   s<     
	$7Eÿÿ
þ*[($