o
    h[                     @   sJ   d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fddZ
d	S )
    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc           	      C   s   t |}|jdd |d t }|t }t| }t| }zddlm} ddlm	} t
| tr6|| } || j| W n tyH   tdw t|| j| jd }|| d	S )
a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktokenr   )get_encoding)dump_tiktoken_bpezY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.)
vocab_filepatternadditional_special_tokensN)r   mkdirr   r   strabsoluter
   r   tiktoken.loadr   
isinstance_mergeable_ranksImportError
ValueErrorr   _pat_str_special_tokens	convertedsave)	r   r   	save_filetokenizer_filesave_file_absoluteoutput_file_absoluter   r   	tokenizer r!   v/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fast   s*   

r#   N)pathlibr   typingr   #transformers.convert_slow_tokenizerr   $transformers.tokenization_utils_fastr   r   r   r#   r!   r!   r!   r"   <module>   s
    