o
    hy                     @   s|  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZ dd	lmZ d
dl m!Z! d
dl"m#Z#m$Z$m%Z%m&Z& d
dl'm(Z( d
dl)m*Z*m+Z+m,Z, d
dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 d
dl-m8Z9 d
dl:m;Z; erd
dl<m=Z= e4>e?Z@ed ZAdZBdZCeG dd dZDeG dd dZEeG dd dZFdeGdeGfddZHeeFeEeDf ZId eeI ddfd!d"ZJe7dd#dd$d%eeF d&eGd'eGd(eeGeGf d)eeG d*eKd+eeG fd,d-ZLe7dddd.d%eeF d&eGd'eGd(eeGeGf d)eeG d+eeG d/eeM fd0d1ZNd2eOfd3d4ZPe7		5	dCd%eeF d&eGd'eGd(eeGeGf d+eGd)eeG d/eMd6eeG ddfd7d8ZQe7	dDd9eeE d&eGd'eGd(eeGeGf d+eGd)eeG deeeGeeG f ed:eRf f fd;d<ZS		dEd eeI d=eeeGeeG f ed:eRf f d>eGd?eeG d@eeG deeeGef  fdAdBZTdS )Fz<
Type definitions and utilities for the `create_commit` API
    N)defaultdict)contextmanager)	dataclassfield)groupby)PathPurePosixPath)TYPE_CHECKINGAnyBinaryIODictIterableIteratorListLiteralOptionalTupleUnion)
thread_map   )	constants)EntryNotFoundErrorHfHubHTTPErrorXetAuthorizationErrorXetRefreshTokenError)
hf_hub_url)
UploadInfo
lfs_uploadpost_lfs_batch_info)
FORBIDDEN_FOLDERSXetTokenTypechunk_iterable(fetch_xet_connection_info_from_repo_infoget_sessionhf_raise_for_statusloggingshatqdm_stream_filevalidate_hf_hub_args)tqdm)_get_progress_bar_context)RepoFilelfsregulari     c                   @   s:   e Zd ZU dZeed< dZeee	d f ed< dd Z
dS )CommitOperationDeletea  
    Data structure holding necessary info to delete a file or a folder from a repository
    on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
            for a file or `"checkpoints/1fec34a/"` for a folder.
        is_folder (`bool` or `Literal["auto"]`, *optional*)
            Whether the Delete Operation applies to a folder or not. If "auto", the path
            type (file or folder) is guessed automatically by looking if path ends with
            a "/" (folder) or not (file). To explicitly set the path type, you can set
            `is_folder=True` or `is_folder=False`.
    path_in_repoauto	is_folderc                 C   sF   t | j| _| jdkr| jd| _t| jts!td| j dd S )Nr2   /zNWrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got 'z'.)_validate_path_in_repor1   r3   endswith
isinstancebool
ValueErrorself r<   o/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/huggingface_hub/_commit_api.py__post_init__M   s   
z#CommitOperationDelete.__post_init__N)__name__
__module____qualname____doc__str__annotations__r3   r   r8   r   r>   r<   r<   r<   r=   r0   9   s
   
 r0   c                   @   sZ   e Zd ZU dZeed< eed< dZee ed< dZee ed< dZ	ee ed< dd	 Z
dS )
CommitOperationCopyab  
    Data structure holding necessary info to copy a file in a repository on the Hub.

    Limitations:
      - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
      - Cross-repository copies are not supported.

    Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.

    Args:
        src_path_in_repo (`str`):
            Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
        path_in_repo (`str`):
            Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
        src_revision (`str`, *optional*):
            The git revision of the file to be copied. Can be any valid git revision.
            Default to the target commit revision.
    src_path_in_repor1   Nsrc_revision_src_oid	_dest_oidc                 C   s   t | j| _t | j| _d S N)r5   rF   r1   r:   r<   r<   r=   r>   w   s   z!CommitOperationCopy.__post_init__)r?   r@   rA   rB   rC   rD   rG   r   rH   rI   r>   r<   r<   r<   r=   rE   X   s   
 rE   c                   @   s  e Zd ZU dZeed< eeeee	f ed< e
dddZeed< e
ddddZee ed	< e
ddddZee ed
< e
ddddZee ed< e
ddddZeed< e
ddddZeed< dddZeddedee	 fddZdefddZedee fddZdS )CommitOperationAdda  
    Data structure holding necessary info to upload a file to a repository on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
        path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
            Either:
            - a path to a local file (as `str` or `pathlib.Path`) to upload
            - a buffer of bytes (`bytes`) holding the content of the file to upload
            - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
                with `open(path, "rb")`. It must support `seek()` and `tell()` methods.

    Raises:
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
            `seek()` and `tell()`.
    r1   path_or_fileobjF)initreprupload_infoN)rM   rN   default_upload_mode_should_ignore_remote_oid_is_uploaded_is_committedreturnc              
   C   s   t | j| _t| jtrt| j| _t| jtr2tjtj	| j}tj
|s1td| dnt| jtjtfs?tdt| jtjriz| j  | jdtj W n ttfyh } ztd|d}~ww t| jtrxt| j| _dS t| jtrt| j| _dS t| j| _dS )z6Validates `path_or_fileobj` and compute `upload_info`.zProvided path: 'z(' is not a file on the local file systemzpath_or_fileobj must be either an instance of str, bytes or io.BufferedIOBase. If you passed a file-like object, make sure it is in binary mode.r   zNpath_or_fileobj is a file-like object but does not implement seek() and tell()N)r5   r1   r7   rL   r   rC   ospathnormpath
expanduserisfiler9   ioBufferedIOBasebytestellseekSEEK_CUROSErrorAttributeErrorr   	from_pathrO   
from_bytesfrom_fileobj)r;   rL   excr<   r<   r=   r>      s:   
z CommitOperationAdd.__post_init__	with_tqdmc                 c   s    t | jtst | jtrF|r*t| j}|V  W d   dS 1 s#w   Y  dS t| jd}|V  W d   dS 1 s?w   Y  dS t | jtrUt| jV  dS t | jtj	ro| j
 }| jV  | j|tj dS dS )u  
        A context manager that yields a file-like object allowing to read the underlying
        data behind `path_or_fileobj`.

        Args:
            with_tqdm (`bool`, *optional*, defaults to `False`):
                If True, iterating over the file object will display a progress bar. Only
                works if the file-like object is a path to a file. Pure bytes and buffers
                are not supported.

        Example:

        ```python
        >>> operation = CommitOperationAdd(
        ...        path_in_repo="remote/dir/weights.h5",
        ...        path_or_fileobj="./local/weights.h5",
        ... )
        CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')

        >>> with operation.as_file() as file:
        ...     content = file.read()

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     while True:
        ...         data = file.read(1024)
        ...         if not data:
        ...              break
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     requests.put(..., data=file)
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
        ```
        Nrb)r7   rL   rC   r   r'   openr^   r\   BytesIOr]   r_   r`   SEEK_SET)r;   rh   fileprev_posr<   r<   r=   as_file   s    $""
zCommitOperationAdd.as_filec                 C   s:   |   }t| W  d   S 1 sw   Y  dS )z[
        The base64-encoded content of `path_or_fileobj`

        Returns: `bytes`
        N)ro   base64	b64encodereadr;   rm   r<   r<   r=   
b64content  s   
$zCommitOperationAdd.b64contentc                 C   s^   | j du rdS | j dkr| jj S |  }t| W  d   S 1 s(w   Y  dS )a  Return the OID of the local file.

        This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
        If the file did not change, we won't upload it again to prevent empty commits.

        For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
        For regular files, the OID corresponds to the SHA1 of the file content.
        Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
              pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
              and more convenient client-side.
        Nr-   )rQ   rO   sha256hexro   r&   git_hashrr   rs   r<   r<   r=   
_local_oid
  s   


$zCommitOperationAdd._local_oidrV   N)F)r?   r@   rA   rB   rC   rD   r   r   r^   r   r   rO   r   rQ   r   
UploadModerR   r8   rS   rT   rU   r>   r   r   ro   rt   propertyrx   r<   r<   r<   r=   rK   |   s    
 
#1	rK   r1   rV   c                    s   |  dr| dd  } | dks| dks|  dr td|  d|  dr+| d	d  } tD ] t fd
d| dD rHtd  d|  dq-| S )Nr4   r   .z..z../z,Invalid `path_in_repo` in CommitOperation: ''z./   c                 3   s    | ]}| kV  qd S rJ   r<   ).0part	forbiddenr<   r=   	<genexpr>+  s    z)_validate_path_in_repo.<locals>.<genexpr>zHInvalid `path_in_repo` in CommitOperation: cannot update files under a 'z/' folder (path: 'z').)
startswithr9   r   anysplitr1   r<   r   r=   r5   "  s   

r5   
operationsc                 C   s   t t}| D ]X}|j}t|tr9|| dkrtd| d ||  d7  < t|jD ]}|t	|  d7  < q,t|t
r^|t	t| dkr^|jrUtd| d qtd| d qdS )a  
    Warn user when a list of operations is expected to overwrite itself in a single
    commit.

    Rules:
    - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
      message is triggered.
    - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
      by a `CommitOperationDelete`, a warning is triggered.
    - If a `CommitOperationDelete` deletes a filepath that is then updated by a
      `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
      delete before upload) but can happen if a user deletes an entire folder and then
      add new files to it.
    r   zBAbout to update multiple times the same file in the same commit: 'z9'. This can cause undesired inconsistencies in your repo.r   z_About to delete a folder containing files that have just been updated within the same commit: 'zLAbout to delete a file that have just been updated within the same commit: 'N)r   intr1   r7   rK   warningswarnr   parentsrC   r0   r3   )r   nb_additions_per_path	operationr1   parentr<   r<   r=   _warn_on_overwriting_operations6  s8   

r      )endpointnum_threadsrevision	additions	repo_typerepo_idheadersr   r   r   c              
      s  g }t | tdD ]+}tdd |D ||| dd\}	}
|
r/ddd |
D }td| ||	7 }qd	d
 | D g }|D ]}|ddu rWtd|d  j d q?|	| q?t
|dkrjtd dS d fdd}tjrtdt
| d t|ddD ]}|| qdS t
|dkrtd ||d  dS tdt
| d| d t||dt
| d|td dS ) a2  
    Uploads the content of `additions` to the Hub using the large file storage protocol.

    Relevant external documentation:
        - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md

    Args:
        additions (`List` of `CommitOperationAdd`):
            The files to be uploaded
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        headers (`Dict[str, str]`):
            Headers to use for the request, including authorization headers and user agent.
        num_threads (`int`, *optional*):
            The number of concurrent threads to use when uploading. Defaults to 5.
        revision (`str`, *optional*):
            The git revision to upload to.

    Raises:
        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
            If an upload failed for any reason
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the server returns malformed responses
        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
            If the LFS batch endpoint returned an HTTP error.
    
chunk_sizec                 S      g | ]}|j qS r<   )rO   r   opr<   r<   r=   
<listcomp>      z%_upload_lfs_files.<locals>.<listcomp>N)upload_infosr   r   r   r   r   token
c              	   S   s0   g | ]}d | d d| di  d qS )z$Encountered error for file with OID oidz: `errormessageget)r   errr<   r<   r=   r     s    "z$LFS batch endpoint returned errors:
c                 S   s   i | ]	}|j j |qS r<   )rO   ru   rv   )r   add_opr<   r<   r=   
<dictcomp>      z%_upload_lfs_files.<locals>.<dictcomp>actionszContent of file r   z/ is already present upstream - skipping upload.r   zNo LFS files to upload.rV   c              
      sP   z| d  }t ||  d W d S  ty' } z
td|j d|d }~ww )Nr   )r   lfs_batch_actionr   r   zError while uploading 'z' to the Hub.)r   	ExceptionRuntimeErrorr1   )batch_actionr   rg   r   r   	oid2addopr<   r=   _wrapped_lfs_upload  s   z._upload_lfs_files.<locals>._wrapped_lfs_uploadz
Uploading z* LFS files to the Hub using `hf_transfer`.zhuggingface_hub.lfs_upload)namer   zUploading 1 LFS file to the Hubz" LFS files to the Hub using up to z threads concurrentlyzUpload z
 LFS files)descmax_workers
tqdm_classry   )r!   UPLOAD_BATCH_MAX_NUM_FILESr   joinr9   r   loggerdebugr1   appendlenr   HF_HUB_ENABLE_HF_TRANSFERhf_tqdmr   )r   r   r   r   r   r   r   batch_actionschunkbatch_actions_chunkbatch_errors_chunkr   filtered_actionsactionr   r<   r   r=   _upload_lfs_filesd  sb   +





r   )r   r   	create_prr   c              
      s  t | dkrdS ddlm} zttj rddindd}W n tyA }	 z|	jjdkr<t	d d	 d
|	 d}	~	ww |j
}
|j|jf}dtttf f fdd}tt | t }tt|d }tt| tdD ]c\}}dd |D }dd |D }tdd |D }|dkrdt|d | d	| d}nd}t||ddddt d}|dtffdd}|||
||| W d   n1 sw   Y  qwdS )aQ  
    Uploads the content of `additions` to the Hub using the xet storage protocol.
    This chunks the files and deduplicates the chunks before uploading them to xetcas storage.

    Args:
        additions (`List` of `CommitOperationAdd`):
            The files to be uploaded.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        headers (`Dict[str, str]`):
            Headers to use for the request, including authorization headers and user agent.
        endpoint: (`str`, *optional*):
            The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
        revision (`str`, *optional*):
            The git revision to upload to.
        create_pr (`bool`, *optional*):
            Whether or not to create a Pull Request with that commit.

    Raises:
        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
            If an upload failed for any reason.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
            If the LFS batch endpoint returned an HTTP error.

    **How it works:**
        The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
            for efficient storage and transfer.

        `hf_xet.upload_files` manages uploading files by:
            - Taking a list of file paths to upload
            - Breaking files into smaller chunks for efficient storage
            - Avoiding duplicate storage by recognizing identical chunks across files
            - Connecting to a storage server (CAS server) that manages these chunks

        The upload process works like this:
        1. Create a local folder at ~/.cache/huggingface/xet/chunk-cache to store file chunks for reuse.
        2. Process files in parallel (up to 8 files at once):
            2.1. Read the file content.
            2.2. Split the file content into smaller chunks based on content patterns: each chunk gets a unique ID based on what's in it.
            2.3. For each chunk:
                - Check if it already exists in storage.
                - Skip uploading chunks that already exist.
            2.4. Group chunks into larger blocks for efficient transfer.
            2.5. Upload these blocks to the storage server.
            2.6. Create and upload information about how the file is structured.
        3. Return reference files that contain information about the uploaded files, which can be used later to download them.
    r   N)upload_filesr   1
token_typer   r   r   r   r   paramsi  z2You are unauthorized to upload to xet storage for r4   zX. Please check that you have configured your access token with write access to the repo.rV   c               	      s@   t tj rddind d} | d u rtd| j| jfS )Nr   r   r   zFailed to refresh xet token)r"   r    WRITEr   access_tokenexpiration_unix_epoch)new_xet_connection)r   r   r   r   r   r   r<   r=   token_refresher,  s   	z*_upload_xet_files.<locals>.token_refresherr   r   c                 S   s   g | ]}|qS r<   r<   r   r<   r<   r=   r   =  s    z%_upload_xet_files.<locals>.<listcomp>c                 S   s   g | ]}t |jqS r<   )rC   rL   r   r<   r<   r=   r   >  s    c                 S   s   g | ]}t j|qS r<   )rW   rX   getsize)r   rX   r<   r<   r=   r   ?  s    zUploading Batch [z]...zUploading...BTzhuggingface_hub.xet_put)r   totalinitialunit
unit_scaler   	log_level	incrementc                    s     |  d S rJ   )update)r   )progressr<   r=   update_progressP  s   z*_upload_xet_files.<locals>.update_progress)r   hf_xetr   r"   r    r   r   responsestatus_coder   r   r   r   r   rC   r   mathceilr   log10	enumerater!   sumzfillr*   r   getEffectiveLevel)r   r   r   r   r   r   r   r   xet_connection_infoexet_endpointaccess_token_infor   
num_chunksnum_chunks_num_digitsir   _chunkpathsexpected_sizedescriptionprogress_cmr   r<   )r   r   r   r   r   r   r   r=   _upload_xet_files  sd   ?
	$"	r   preupload_infoc                 C   sh   |  d}t|tstd|D ]!}t|tr-t| dtr-t| dtr-|d dv s1tdq| S )Nfilesz&preupload_info is improperly formattedrX   
uploadModer,   z'preupload_info is improperly formatted:)r   r7   listr9   dictrC   )r   r   	file_infor<   r<   r=   _validate_preupload_infoW  s   

r   Fgitignore_contentc              	   C   sB  |dur|nt j}i }i }	i }
t| dD ]e}ddd |D i}|dur'||d< t j| d| d| d	| |||r>d
dindd}t| t| }|jdi dd |d D  |	jdi dd |d D  |
jdi dd |d D  q| D ]}||j	 |_
|	|j	 |_|
|j	 |_q|| D ]}|jjdkrd|_
qdS )a  
    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob,
    as a git LFS blob, or as a XET file. Input `additions` are mutated in-place with the upload mode.

    Args:
        additions (`Iterable` of :class:`CommitOperationAdd`):
            Iterable of :class:`CommitOperationAdd` describing the files to
            upload to the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        headers (`Dict[str, str]`):
            Headers to use for the request, including authorization headers and user agent.
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.
        gitignore_content (`str`, *optional*):
            The content of the `.gitignore` file to know which files should be ignored. The order of priority
            is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
            in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
            (if any).
    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    Nr/   r   c                 S   s.   g | ]}|j t|jjd |jjdqS )ascii)rX   samplesize)r1   rp   rq   rO   r   decoder   r   r<   r<   r=   r     s    z'_fetch_upload_modes.<locals>.<listcomp>	gitIgnorez/api/zs/z/preupload/r   r   )jsonr   r   c                 S      i | ]	}|d  |d qS )rX   r   r<   r   rm   r<   r<   r=   r     r   z'_fetch_upload_modes.<locals>.<dictcomp>c                 S   r  )rX   shouldIgnorer<   r  r<   r<   r=   r     r   c                 S   s   i | ]
}|d  | dqS )rX   r   r   r  r<   r<   r=   r     s    r   r.   r<   )r   ENDPOINTr!   r#   postr$   r   r  r   r1   rQ   rR   rS   rO   r   )r   r   r   r   r   r   r   r   upload_modesshould_ignore_infooid_infor   payloadrespr   additionr<   r<   r=   _fetch_upload_modesf  s>   '
 r  copiesr+   c              
   C   s  ddl m}m} |||d}i }	i }
dd | D }tdt|tD ]#}|j||||t  ||d}|D ]}t||sC|j|
|j	|f< q4q!t
| dd	 d
D ]\}}t|}dd |D }tdt|tD ]T}|j||||t  |ps||d}|D ]>}t||rtd|j|
|j	|f< |jr||	|j	|f< qyt||||p||j	d}t j||d}t| |j|	|j	|f< qyqd|D ]*}|j|f|	vrtd|j d|p| d|
|j|jf|_|
|j|f|_qqM|	S )a;  
    Fetch information about the files to copy.

    For LFS files, we only need their metadata (file size and sha256) while for regular files
    we need to download the raw content from the Hub.

    Args:
        copies (`Iterable` of :class:`CommitOperationCopy`):
            Iterable of :class:`CommitOperationCopy` describing the files to
            copy on the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        headers (`Dict[str, str]`):
            Headers to use for the request, including authorization headers and user agent.
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.

    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
        Key is the file path and revision of the file to copy.
        Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).

    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    r   )HfApi
RepoFolder)r   r   c                 S   r   r<   r   r   r<   r<   r=   r     r   z(_fetch_files_to_copy.<locals>.<listcomp>r   )r   r   r   r   c                 S   s   | j S rJ   )rG   )r   r<   r<   r=   <lambda>  s    z&_fetch_files_to_copy.<locals>.<lambda>)keyc                 S   r   r<   )rF   r   r<   r<   r=   r     r   z$Copying a folder is not implemented.)r   r   r   r   filename)r   zCannot copy z at revision z: file is missing on repo.)hf_apir  r  ranger   FETCH_LFS_BATCH_SIZEget_paths_infor7   blob_idrX   r   r   NotImplementedErrorr-   r   r#   r   r$   contentrF   r   rG   rH   r1   rI   )r  r   r   r   r   r   r  r  r  files_to_copyr	  
dest_pathsoffsetdest_repo_filesrm   rG   r   	src_pathssrc_repo_filessrc_repo_fileurlr   r   r<   r<   r=   _fetch_files_to_copy  sn   '


r#  r  commit_messagecommit_descriptionparent_commitc           	   	   c   s   |dur|nd}||d}|dur||d< d|dV  d}| D ]}t |tr9|jr9td|j d	 |d
7 }q t |trS|jdkrSd|  |jdddV  q t |trp|jdkrpd|jd|j	j
 |j	jddV  q t |tr|jrzdndd|jidV  q t |tr||j|jf }t |trdt| |jdddV  q |jrd|jd|jj
ddV  q tdtd| dt|dd |dkrtd| d dS dS )aG  
    Builds the payload to POST to the `/commit` API of the Hub.

    Payload is returned as an iterator so that it can be streamed as a ndjson in the
    POST request.

    For more information, see:
        - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
        - http://ndjson.org/
    N )summaryr   parentCommitheader)r  valuer   zSkipping file 'z(' in commit (ignored by gitignore file).r   r.   rm   rp   )r  rX   encodingr-   lfsFileru   )rX   algor   r   deletedFolderdeletedFilerX   )rX   r.  r   z_Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info.z(Unknown operation to commit. Operation: z. Upload mode: rQ   zSkipped z/ file(s) in commit (ignored by gitignore file).)r7   rK   rR   r   r   r1   rQ   rt   r   rO   ru   rv   r   r0   r3   rE   rF   rG   r^   rp   rq   r-   r9   getattrinfo)	r   r  r$  r%  r&  header_valuenb_ignored_filesr   file_to_copyr<   r<   r=   _prepare_commit_payload!  st   


	








	
r6  )NFNrJ   )NN)UrB   rp   r\   r   rW   r   collectionsr   
contextlibr   dataclassesr   r   	itertoolsr   pathlibr   r   typingr	   r
   r   r   r   r   r   r   r   r   r   tqdm.contrib.concurrentr   r'  r   errorsr   r   r   r   file_downloadr   r-   r   r   r   utilsr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r   
utils.tqdmr*   r  r+   
get_loggerr?   r   rz   r  r   r0   rE   rK   rC   r5   CommitOperationr   r   r   r8   r   r   r   r  r^   r#  r6  r<   r<   r<   r=   <module>   s   40
# &.
n
 
	T
i