o
    hW9                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZmZmZmZ dd	lmZmZmZmZ dd
lmZ eeZeddG dd dZ G dd dZ!G dd deZ"dS )    N)Future)	dataclass)SEEK_ENDSEEK_SETBytesIO)Path)LockThread)DictListOptionalUnion   )DEFAULT_IGNORE_PATTERNS
CommitInfoCommitOperationAddHfApi)filter_repo_objectsT)frozenc                   @   s2   e Zd ZU dZeed< eed< eed< eed< dS )_FileToUploadzWTemporary dataclass to store info about files to upload. Not meant to be used directly.
local_pathpath_in_repo
size_limitlast_modifiedN)	__name__
__module____qualname____doc__r   __annotations__strintfloat r"   r"   u/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/huggingface_hub/_commit_scheduler.pyr      s   
 r   c                   @   s  e Zd ZdZddddddddddd
dedeeef deeef d	e	e d
e	e de	e de	e
 de	e de	eee ef  de	eee ef  de
de	d ddfddZd$ddZd%ddZd$ddZd$ddZdefddZde	e fd d!Zde	e fd"d#ZdS )&CommitSchedulera  
    Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).

    The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
    properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
    with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
    to learn more about how to use it.

    Args:
        repo_id (`str`):
            The id of the repo to commit to.
        folder_path (`str` or `Path`):
            Path to the local folder to upload regularly.
        every (`int` or `float`, *optional*):
            The number of minutes between each commit. Defaults to 5 minutes.
        path_in_repo (`str`, *optional*):
            Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
            of the repository.
        repo_type (`str`, *optional*):
            The type of the repo to commit to. Defaults to `model`.
        revision (`str`, *optional*):
            The revision of the repo to commit to. Defaults to `main`.
        private (`bool`, *optional*):
            Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
        token (`str`, *optional*):
            The token to use to commit to the repo. Defaults to the token saved on the machine.
        allow_patterns (`List[str]` or `str`, *optional*):
            If provided, only files matching at least one pattern are uploaded.
        ignore_patterns (`List[str]` or `str`, *optional*):
            If provided, files matching any of the patterns are not uploaded.
        squash_history (`bool`, *optional*):
            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
            useful to avoid degraded performances on the repo when it grows too large.
        hf_api (`HfApi`, *optional*):
            The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).

    Example:
    ```py
    >>> from pathlib import Path
    >>> from huggingface_hub import CommitScheduler

    # Scheduler uploads every 10 minutes
    >>> csv_path = Path("watched_folder/data.csv")
    >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)

    >>> with csv_path.open("a") as f:
    ...     f.write("first line")

    # Some time later (...)
    >>> with csv_path.open("a") as f:
    ...     f.write("second line")
    ```

    Example using a context manager:
    ```py
    >>> from pathlib import Path
    >>> from huggingface_hub import CommitScheduler

    >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
    ...     csv_path = Path("watched_folder/data.csv")
    ...     with csv_path.open("a") as f:
    ...         f.write("first line")
    ...     (...)
    ...     with csv_path.open("a") as f:
    ...         f.write("second line")

    # Scheduler is now stopped and last commit have been triggered
    ```
       NF)
everyr   	repo_typerevisionprivatetokenallow_patternsignore_patternssquash_historyhf_apirepo_idfolder_pathr&   r   r'   r(   r)   r*   r+   r,   r-   r.   r   returnc             	   C   s:  |pt |d| _t|  | _|pd| _|	| _|
d u r g }
nt|
t	r(|
g}
|
t
 | _| j r;td| j d| jjddd | jj|||dd}|j| _|| _|| _|| _i | _|dksitd	| dt | _|| _|| _td
| j d| j d| j d t| jdd| _| j  t | j! d| _"d S )N)r*    z0'folder_path' must be a directory, not a file: ''.T)parentsexist_ok)r/   r)   r'   r5   r   z)'every' must be a positive integer, not 'zScheduled job to push 'z' to 'z' every z	 minutes.)targetdaemonF)#r   apir   
expanduserresolver0   r   r+   
isinstancer   r   r,   is_file
ValueErrormkdircreate_repor/   r'   r(   r*   last_uploadedr   lockr&   r-   loggerinfor	   _run_scheduler_scheduler_threadstartatexitregister_push_to_hub_CommitScheduler__stopped)selfr/   r0   r&   r   r'   r(   r)   r*   r+   r,   r-   r.   repo_urlr"   r"   r#   __init__d   s8   



$

zCommitScheduler.__init__c                 C   s
   d| _ dS )ziStop the scheduler.

        A stopped scheduler cannot be restarted. Mostly for tests purposes.
        TN)rJ   rK   r"   r"   r#   stop   s   
zCommitScheduler.stopc                 C   s   | S Nr"   rN   r"   r"   r#   	__enter__   s   zCommitScheduler.__enter__c                 C   s   |     |   d S rP   )triggerresultrO   )rK   exc_type	exc_value	tracebackr"   r"   r#   __exit__   s   zCommitScheduler.__exit__c                 C   s(   	 |   | _t| jd  | jrdS q)z7Dumb thread waiting between each scheduled push to Hub.T<   N)rR   last_futuretimesleepr&   rJ   rN   r"   r"   r#   rD      s   
zCommitScheduler._run_schedulerc                 C   s   | j | jS )zTrigger a `push_to_hub` and return a future.

        This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
        immediately, without waiting for the next scheduled commit.
        )r8   run_as_futurerI   rN   r"   r"   r#   rR      s   zCommitScheduler.triggerc              
   C   sv   | j rd S td z|  }| jr#td | jj| j| j| j	d |W S  t
y: } z	td|   d }~ww )Nz((Background) scheduled commit triggered.z$(Background) squashing repo history.)r/   r'   branchzError while pushing to Hub: )rJ   rB   rC   push_to_hubr-   r8   super_squash_historyr/   r'   r(   	Exceptionerror)rK   valueer"   r"   r#   rI      s   

zCommitScheduler._push_to_hubc           
   
      sL   j a td  fddt jdD } jr$ jd dnd}g }t|	  j
 jdD ])}|| }| } j|du sM j| |jkr\|t||| |j|jd	 q3W d   n1 sgw   Y  t|d
krytd dS td dd |D }td  jj j j|d jd}|D ]	}	|	j j|	j< q|S )a  
        Push folder to the Hub and return the commit info.

        <Tip warning={true}>

        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
        issues.

        </Tip>

        The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
        uploads only changed files. If no changes are found, the method returns without committing anything. If you want
        to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
        for example to compress data together in a single file before committing. For more details and examples, check
        out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
        z-Listing files to upload for scheduled commit.c                    s&   i | ]}|  r| j |qS r"   )r<   relative_tor0   as_posix).0pathrN   r"   r#   
<dictcomp>   s    z/CommitScheduler.push_to_hub.<locals>.<dictcomp>z**/*/r2   )r+   r,   N)r   r   r   r   r   z4Dropping schedule commit: no changed file to upload.z9Removing unchanged files since previous scheduled commit.c                 S   s&   g | ]}t t|j|jd |jdqS ))r   )path_or_fileobjr   )r   PartialFileIOr   r   r   )rf   file_to_uploadr"   r"   r#   
<listcomp>  s    z/CommitScheduler.push_to_hub.<locals>.<listcomp>z%Uploading files for scheduled commit.zScheduled Commit)r/   r'   
operationscommit_messager(   )rA   rB   debugsortedr0   globr   stripr   keysr+   r,   statr@   getst_mtimeappendr   st_sizelenr8   create_commitr/   r'   r(   r   r   )
rK   relpath_to_abspathprefixfiles_to_uploadrelpathr   ru   add_operationscommit_infofiler"   rN   r#   r^      sT   


 



	zCommitScheduler.push_to_hubr1   N)r1   r$   )r   r   r   r   r   r   r   r    r!   r   boolr   rM   rO   rQ   rW   rD   r   rR   r   rI   r^   r"   r"   r"   r#   r$      s\    K

	


9


r$   c                       s   e Zd ZdZdeeef deddfddZd fdd	Z	defd
dZ
defddZdef fddZdefddZefdededefddZddee defddZ  ZS )rk   a  A file-like object that reads only the first part of a file.

    Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
    file is uploaded (i.e. the part that was available when the filesystem was first scanned).

    In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
    disturbance for the user. The object is passed to `CommitOperationAdd`.

    Only supports `read`, `tell` and `seek` methods.

    Args:
        file_path (`str` or `Path`):
            Path to the file to read.
        size_limit (`int`):
            The maximum number of bytes to read from the file. If the file is larger than this, only the first part
            will be read (and uploaded).
    	file_pathr   r1   Nc                 C   s6   t || _| jd| _t|t| j j| _	d S )Nrb)
r   
_file_pathopen_fileminosfstatfilenory   _size_limit)rK   r   r   r"   r"   r#   rM   .  s   
zPartialFileIO.__init__c                    s   | j   t  S rP   )r   closesuper__del__rN   	__class__r"   r#   r   3  s   

zPartialFileIO.__del__c                 C   s   d| j  d| j dS )Nz<PartialFileIO file_path=z size_limit=>)r   r   rN   r"   r"   r#   __repr__7  s   zPartialFileIO.__repr__c                 C   s   | j S rP   )r   rN   r"   r"   r#   __len__:  s   zPartialFileIO.__len__namec                    s.   | ds	|dv rt |S td| d)N_)readtellseekz PartialFileIO does not support 'r3   )
startswithr   __getattribute__NotImplementedError)rK   r   r   r"   r#   r   =  s   zPartialFileIO.__getattribute__c                 C   s
   | j  S )z!Return the current file position.)r   r   rN   r"   r"   r#   r   B  s   
zPartialFileIO.tell_PartialFileIO__offset_PartialFileIO__whencec                 C   sB   |t krt| | }t}| j||}|| jkr| j| jS |S )zChange the stream position to the given offset.

        Behavior is the same as a regular file, except that the position is capped to the size limit.
        )r   rz   r   r   r   r   )rK   r   r   posr"   r"   r#   r   F  s   
zPartialFileIO.seek_PartialFileIO__sizec                 C   sB   | j  }|du s|dk r| j| }nt|| j| }| j |S )zRead at most `__size` bytes from the file.

        Behavior is the same as a regular file, except that it is capped to the size limit.
        Nr   )r   r   r   r   r   )rK   r   currenttruncated_sizer"   r"   r#   r   U  s
   
zPartialFileIO.readr   )r   )r   r   r   r   r   r   r   r    rM   r   r   r   r   r   r   r   r   bytesr   __classcell__r"   r"   r   r#   rk     s     rk   )#rG   loggingr   rZ   concurrent.futuresr   dataclassesr   ior   r   r   pathlibr   	threadingr   r	   typingr
   r   r   r   r.   r   r   r   r   utilsr   	getLoggerr   rB   r   r$   rk   r"   r"   r"   r#   <module>   s$    
	 