o
    h@<                  
   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ eeZh dZh d	Ze	G d
d dZdeejef deeef fddZ deeejf deeeeeee!f f  ddfddZ"deeejf deeejf ddfddZ#dej$dedeeeje!f ddfddZ%deeee!f de!fddZ&dedefddZ'd ed!ee ddfd"d#Z(d$ej$d%ej)de*fd&d'Z+dS )(    N)contextmanager)	dataclassfield)Path)AnyDict	GeneratorIterableTupleUnion   )DDUFCorruptedFileErrorDDUFExportErrorDDUFInvalidEntryNameError>   .txt.json.model.safetensors>   config.jsonscheduler_config.jsontokenizer_config.jsonpreprocessor_config.jsonc                   @   sl   e Zd ZU dZeed< eed< eed< eddZe	ed< e
deed	d	f fd
dZddedefddZd	S )	DDUFEntrya  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    filenamelengthoffsetF)repr	dduf_pathreturnNc              	   c   s    | j d8}tj| dtjd}|| j| j| j  V  W d   n1 s*w   Y  W d   dS W d   dS 1 sBw   Y  dS )a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        rbr   )r   accessN)r   openmmapfilenoACCESS_READr   r   )selffmm r(   w/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/huggingface_hub/serialization/_dduf.pyas_mmap9   s   "zDDUFEntry.as_mmaputf-8encodingc                 C   sP   | j d}|| j || jj|dW  d   S 1 s!w   Y  dS )zRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r   )r,   N)r   r!   seekr   readr   decode)r%   r,   r&   r(   r(   r)   	read_textJ   s   $zDDUFEntry.read_text)r+   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r   bytesr*   r0   r(   r(   r(   r)   r   "   s   
 r   r   r   c                 C   s.  i }t | } td|   tt| dQ}| D ]D}td|j  |j	tj
kr0tdzt|j W n tyL } z	td|j |d}~ww t||}t|j||j| d||j< qW d   n1 sjw   Y  d|vrwtd	t|d  }t||  td
|  dt| d |S )a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   model_index.json7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   loggerinfozipfileZipFiler5   infolistdebugr   compress_type
ZIP_STOREDr   _validate_dduf_entry_namer   _get_data_offsetr   	file_sizejsonloadsr0   _validate_dduf_structurekeyslen)r   entrieszfr=   er   indexr(   r(   r)   read_dduf_fileZ   s4   '
rP   rL   c                 C   sl  t d|  d t }d}tt| dtjk}|D ]`\}}||v r*td| || |dkrQzt	
t| }W n t	jyP } ztd|d}~ww zt|}W n tyk } ztd| |d}~ww t d	| d
 t||| qW d   n1 sw   Y  |du rtdzt|| W n ty } ztd|d}~ww t d|   dS )a  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file ''NwzCan't add duplicate entry: r:   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer;   zInvalid DDUF file structure.zDone writing DDUF file )r<   r=   setr>   r?   r5   rC   r   addrG   rH   _load_contentr/   JSONDecodeErrorrD   r   rA   _dump_content_in_archiverI   r   )r   rL   	filenamesrO   archiver   contentrN   r(   r(   r)   export_entries_as_dduf   sD   :


r[   folder_pathc                    s6   t   dtttt f  f fdd}t| |  dS )a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r   c                  3   s|    t  dD ]3} |  sq| jtvrtd|  d q|  }t|j	dkr4td|  d q|
 | fV  qd S )Nz**/*zSkipping file 'z' (file type not allowed)   z"' (nested directories not allowed))r   globis_filesuffixDDUF_ALLOWED_ENTRIESr<   rA   relative_torK   partsas_posix)pathpath_in_archiver\   r(   r)   _iterate_over_folder  s   

z3export_folder_as_dduf.<locals>._iterate_over_folderN)r   r	   r
   r5   r[   )r   r\   rh   r(   rg   r)   export_folder_as_dduf   s   ri   rY   r   rZ   c              	   C   s   | j |dddL}t|ttfr1t|}| d}t||d W d    n1 s+w   Y  nt|tr<|| ntd| dW d    d S W d    d S 1 sWw   Y  d S )NrR   T)force_zip64r   i   zInvalid content type for z. Must be str, Path or bytes.)	r!   
isinstancer5   r   shutilcopyfileobjr8   writer   )rY   r   rZ   
archive_fhcontent_path
content_fhr(   r(   r)   rW     s   
"rW   c                 C   s<   t | ttfrt|  S t | tr| S tdt|  d)zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got .)rk   r5   r   
read_bytesr8   r   type)rZ   r(   r(   r)   rU   *  s
   
rU   
entry_namec                 C   sh   d|  dd  tvrtd|  d| v rtd|  d| d} | ddkr2td|  d| S )	Nrr   zFile type not allowed: \z0Entry names must use UNIX separators ('/'). Got /   z-DDUF only supports 1 level of directory. Got )splitra   r   stripcount)ru   r(   r(   r)   rD   7  s   
rD   rO   entry_namesc                    s   t | tstdt|  ddd  D }|D ]%| vr&td dt fddtD s=td	 d
t dqdS )a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got rr   c                 S   s"   h | ]}d |v r| d d qS )rx   r   )rz   ).0entryr(   r(   r)   	<setcomp>W  s   " z+_validate_dduf_structure.<locals>.<setcomp>zMissing required entry 'z' in 'model_index.json'.c                 3   s"    | ]} d |  v V  qdS )rx   Nr(   )r~   required_entryr}   folderr(   r)   	<genexpr>[  s     z+_validate_dduf_structure.<locals>.<genexpr>z!Missing required file in folder 'z!'. Must contains at least one of N)rk   dictr   rt   anyDDUF_FOLDER_REQUIRED_ENTRIES)rO   r}   dduf_foldersr(   r   r)   rI   B  s   
rI   rM   r=   c                 C   s   | j du r	td|j}| j | | j d}t|dk r"tdt|dd d}t|dd d}|d | | }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    Nz+ZipFile object must be opened in read mode.   zIncomplete local file header.      little)fpr   header_offsetr-   r.   rK   r7   
from_bytes)rM   r=   r   local_file_headerfilename_lenextra_field_lendata_offsetr(   r(   r)   rE   a  s   
rE   ),rG   loggingr"   osrl   r>   
contextlibr   dataclassesr   r   pathlibr   typingr   r   r   r	   r
   r   errorsr   r   r   	getLoggerr1   r<   ra   r   r   PathLiker5   rP   r8   r[   ri   r?   rW   rU   rD   rI   ZipInfor7   rE   r(   r(   r(   r)   <module>   s<     
	$7E
*[($