o
    he                     @   sx  d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d
gZG dd deZdd ZdefddZeddG dd dejjZdd Zdd Zejjj j!ejjj"j!ejjj#j!ejjj$j%ejjj&j!ejjj'j(ejjj)j!ejjj*j!iZ+de,e de%fddZ-de,e de,e de,e fd d!Z.ed"dd#d
 Z/dS )$    N)defaultdict)Enum)
FakeTensorFakeTensorMode)Node)compatibility)StorageWeakRef)_pytree)tree_map_only	reinplacec                   @   s   e Zd ZdZdZdZdS )	_ViewTyper         N)__name__
__module____qualname__NonViewSingleOutputViewMultiOutputView r   r   m/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/fx/passes/reinplace.pyr      s    r   c                 C   sT   | d ur$t | tjjr&| j}t|jdkr(|jd }|jd uo#|jj S d S d S d S )Nr   )	
isinstancetorch_ops
OpOverload_schemalen	arguments
alias_infois_writetgtschema	first_argr   r   r   _is_view_op   s   
r$   returnc                 C   sd   | d ur/t | tjjr/| j}t|jdkr/|jd }|jd ur/|jjs/d|jj	v r,t
jS t
jS t
jS )Nr   *)r   r   r   r   r   r   r   r   r   	after_setr   r   r   r   r    r   r   r   _get_view_type$   s   
r(   F)is_backward_compatiblec                       s.   e Zd Zdef fddZ fddZ  ZS )_FunctionalizationMetadataPropnodec           	         sj  |  j d7  _ t |}||jd< | j |jd< |j}|jtjjj	j
u r*|dd  }|jdkr|t|j}|tjkrLt|jd tsCJ |jd |jd< n0|tjkrZ|jd | j|< n"|jtju r||jd }| j|d }|d ur|t|tswJ ||jd< d|jv rt|jd tsJ t|jd jd tsJ t|jd  }t|jd jd  }||ksJ |S )Nr   fake_resultnode_idxcall_functionr   view_of)node_countersuperrun_nodemetaargstargetr   opsatencopy_defaultopr(   r   r   r   r   r   multi_output_view_nodes	_operatorgetitemgetr   r   _typed_storage)	selfr+   result	node_args	view_typelist_argmaybe_base_of_viewview_storagebase_storage	__class__r   r   r2   >   s:   







z'_FunctionalizationMetadataProp.run_nodec                    sT   i | _ d| _t   fdd|D }t j| W  d    S 1 s#w   Y  d S )Nc                    s&   g | ]}t |tjr |n|qS r   )r   r   Tensorfrom_tensor.0amoder   r   
<listcomp>y   s    z<_FunctionalizationMetadataProp.propagate.<locals>.<listcomp>)r;   r0   r   r1   run)r@   r4   	fake_argsrH   rP   r   	propagatet   s   

$z(_FunctionalizationMetadataProp.propagate)r   r   r   r   r2   rU   __classcell__r   r   rH   r   r*   <   s    6r*   c                 C   s   |j do|j d d | j k}t| jt|jko'tdd t| j|jD }|jd jd ur7|jd jjs9J tdd |jdd  D sIJ |oL|S )N_rJ   c                 s   s     | ]\}}|j |j kV  qd S N)type)rN   a1a2r   r   r   	<genexpr>   s
    

z!_schemas_match.<locals>.<genexpr>r   c                 s   s    | ]}|j d u V  qd S rX   )r   rM   r   r   r   r\      s    r   )nameendswithr   r   allzipr   r   )functional_schemainplace_schemanames_matcharg_types_matchr   r   r   _schemas_match   s   
 re   c                    s   t tjjs	d S trd S jdd }jj}t	tj
|}|d u r'd nt	|| dd   d u r6d S  fdd  D }fdd|D }t|dkrRd S t|dksZJ |d }|S )	N.rJ   rW   c                    s   g | ]}t  |qS r   )getattr)rN   overload_name)maybe_inplace_opr   r   rR      s    z)_maybe_get_inplace_op.<locals>.<listcomp>c                    s   g | ]}t  j|jr|qS r   )re   r   )rN   f)r:   r   r   rR      s
    r   r   )r   r   r   r   r$   r   splitoverloadpacketr   rg   r6   	overloadsr   )r:   op_namespaceop_base_namemaybe_namespace_moduleinplace_overloads'inplace_overloads_with_matching_schemas
inplace_opr   )ri   r:   r   _maybe_get_inplace_op   s0   

rt   tensor_aliasesop_indexc                 C   sv   dd }t  }| D ]/}|j}|D ]'}d|jvs|jd |krq|| v r2t|jtjjs1|jtj	kr2q|
| qq	|S )Nc                 S   s$   t | tr|t|   d S d S rX   )r   r   addr   r?   )xset_r   r   r   _add_if_tensor   s   
z2_get_all_later_node_usages.<locals>._add_if_tensorr-   )setusersr3   r   r5   r   r   r   r<   r=   rw   )ru   rv   rz   nodes_used_aftertusage_nodesnr   r   r   _get_all_later_node_usages   s   r   later_node_usagesself_aliasesc              
   C   s(  dd }t  }t| dd dD ]}|jtvrq|jd }|jd }t|ts(J t|jd ts2J t|ts9J t|jd tsCJ t|j }|D ]F}d	|jvrRqJ|jd	 }	z0||	jd g|jd
d  R i |j	}
|jd }||	jd |jd r||
|r|
| W qJ ty   Y qJw q|S )Nc                 S   s0   |   |  ko|  | ko|  | kS rX   )sizestridestorage_offset)rO   br   r   r   matching_view_metadata   s
   z=_get_view_inverse_node_usages.<locals>.matching_view_metadatac                 S   s
   | j d S )Nr-   r3   rx   r   r   r   <lambda>   s   
 z/_get_view_inverse_node_usages.<locals>.<lambda>)keyr   r   r,   r/   r   )r{   sortedr5   _VIEW_INVERSE_MAPr4   r   r   r3   r   kwargsrw   	Exception)r   r   r   view_inverse_nodesr   basemutated_vieworiginal_view
self_aliasself_alias_baseview_replay_metadataexpected_metadatar   r   r   _get_view_inverse_node_usages   sJ   







r   Tc           !   	      sF  t | j|  dd | jjD }tt| jjD ] d jv r/ fdd}t| jd  qt }| jjD ]؉j	dkrt
jtjjsIq7tjjjdk rSq7tjjjd jtjkraq7jd tjd }tjd }d	}t|t|krt||D ]\}}	| |	 krd
}|j|	jkrd
}t|dkrd
}q|rjtjjjjkrq7tjd  }
|
|v rq7tfddjD dkrq7tjd  }
|
 }t |jd }t!||}t|| dk}|sq7jt"v rJ|vrJt"j }| j#2 jd }jdd }| j$d|ft%| j&}| j$dtjjj'j||fi  W d   n	1 s?w   Y  |( nt)j}|du rUq7|_tjd  }|
 *|  | *|
  |*| t+,g|D ]jd fddj-D }|D ]{}fdd}t.t/||j|_t.t/||j&|_&tjd }t|jd }dd |D }dd |D }t|dkrt|dkr||krtjd }dd |D }t|dksJ |\}|\}| *|  | *|  qqq7|D ]	} | j0|  q| 1  | S )a   
    Given an fx.GraphModule, modifies it to perform "reinplacing",
    mutating the nodes of the graph.
    We look for out-of-place op call sites like `b = a.add(...)`,
    and convert them to be inplace (`b = a.add_(...)`),
    as long as the input to the current operator ("a") isn't re-used
    anywhere later in the graph.

    This pass currently expects to operate on a **functional, ATen** graph.
    This can be obtained by running `make_fx(functionalize(f))`.

    Sample inputs are needed to determine aliasing relationships of the inputs.
    In general, we can't reinplace node `b = a.add(...)` if "a" aliases any of the
    inputs to the program.

    Given a node "b = foo(a, args...) the algorithm for re-inplacing is as follows:

    (1) Perform some initial checks on the metadata of "a" and "args..."
        that can disqualify them from being reinplaced.

      (1a) Check that the self argument we're attempting to reinplace
           has acceptable dtype/size metadata to reinplace with.

           For example, if we have:
             a = torch.ones(1)
             b = torch.ones(10)
             out = torch.add(a, b)
           We can't turn that into
             a.add_(b)
           Because that would require resizing "a".

           Similarly, we can't convert torch.ge(a, b) into a.ge_(b),
           because that would require changing a's dtype (from e.g. float32 to bool).
           Note that in this specific example, we could technically do better..

           If we see the pattern:
             a_1 = a.ge(b)
             a_2 = aten._to_copy(a_1, a.dtype)
           Then we this should be valid to completely re-inplace
           (this is exactly what functionalization will emit when it sees a.ge_(b)).

           This optimization is only really important for user programs
           that directly use inplace comparison ops though.

           We also cannot re-inplace on tensors that have overlapping memory,
           e.g. torch.ones(1).expand(4, 4).add_(1)

      (1b) Check if "a" is an alias of any of the program inputs.

          If it is, skip and move to the next node.
          Inplace'ing an op that would cause it to mutate a program is not sound,
          because that would be a side effect visible to the user.

          NOTE: there's a future optimization that we should make:
          if "a" is a (alias of a)  program input, but later in the program
          there is a node that looks like "a.copy_(...)",
          Then re-inplacing is ok to do - we are temporarily re-using a's buffer,
          which will later be overwritten by the copy_() call.

          This will be an important optimization to have for programs that mutate
          their inputs. It currently isn't implemented though.

      (1c) Check if "a" and "args..." alias

          For example, re-inplacing to create code like the below
          isn't guaranteed to be sound:

            aten.mul_(a, a)

    (2) Check that "a" and all of its outstanding aliases are not used anywhere
        later in the graph. If this is the case, then it's safe to re-inplace
        to "b = foo_(a)".

        There are a few caveats to this, explained in more detail below:
        (a) If "a" is used later as an argument to a view op, that is okay.
            It's only a problem if "a" (or that view) is later passed
            into a normal operator, or if it is returned as the program output.
        (b) If "a" is a repeat argument in `foo()`, then don't reinplace.
            Most ATen kernels don't make any guarantees that this is sound,
            e.g. if you do aten.mul_(a, a).
            So we'll just ban re-inplacing in this case.
            It's only a problem if "a" (or that view) is later passed
        (c) If "a" is used as an input into a view "inverse" / "scatter"
            operator, it is potentially fine to re-inplace
            (and remove that scatter operator from the graph).
            See below for a more detailed example.

        NOTE: there is an optimization in this step that is crucial
        to fully recovering performance from functionalization.

        Given this program:
        def f(x):
            a = torch.ops.aten.add(x, x)
            b = torch.ops.aten.diagonal(a)
            torch.ops.aten.fill_(b, 0)
            return d

        Functionalization will emit the following:
        def f(x):
            a = torch.ops.aten.add(x, x)
            b = torch.ops.aten.diagonal(a, 0, 1)
            b_updated = torch.ops.aten.fill(b, 0)
            a_updated = torch.ops.aten.diagonal_scatter(a, b_updated, 0, 1)
            return a_updated

        Ordinarily, we would not be able to reinplace the fill,
        because "b" aliases with "a" which is used by the diagonal_scatter call.

        "re-inplacing" is on the hook for figuring out that it is ok to
        completely, the expensive diagonal_scatter call, if we re-inplace the add().

        So, for every `alias in alias_set(a)`, instead of checking
        that "alias" is not used anywhere later in the graph,
        we check that
            EITHER:
          (a) alias is not used anywhere later in the graph
            OR:
          (b) alias is used exactly once later on in the graph,
              in the following op:

                out = foo_scatter(alias, x, args...)

              where the following must hold:
                (i) "foo_scatter" is the "inverse" operator for foo.
                    This only applies to "foo" ops that are view operators,
                    which view into a subset of the original tensor's memory.
                    In practice, there are ~4 operators where this applies:
                      diagonal -> diagonal_scatter
                      slice -> slice_scatter
                      select -> select_scatter
                      as_strided -> as_strided_scatter
                (ii) "args..." are the same between the foo() and foo_scatter() calls.

    (3) Perform the actual re-inplacing on foo!

      (3b) is the common case, but special care is needed for {view}_scatter (3a)

      (3a) {view}_scatter ops.

        Consider this program:
          a = torch.zeros(2, 2)
          b = torch.ones(2)
          a[0] = b

        Post functionalization, that will look like:
          a = torch.zeros(2)
          b = torch.ones(1)
          a_updated = torch.select_scatter(a, b, 0, 0)

        In this case though, there is no "functional" op to re-inplace!
        Instead, we'd like to directly remove toe select_scatter call.
        We already know from (3) that this is valid,
        because "a" has no later usages in the graph.

        We perform the re-inplacing on the {view}_scatter op like so
        Before:
          a_updated = torch.select_scatter(a, b, args...)
        After:
          a_slice = a.select(a, args...)
          a_slice.copy_(b)

      (3b) Otherwise, replace the functional op with its inplace variant.
        Before:
          b = foo(a, args...)
        After:
          a.foo_(args...)

    (4) Finally, after converting either:
          Before:
            b = foo(a)
          After:
            foo_(a)
        or
          Before:
            b = {slice}_scatter(a, mutated_slice, args...)
          After:
            slice = {slice}(a, args...)
            slice.copy_(mutated_slice)

        We now need to find all later nodes that use "b" as an argument
        and update them to take in "a" instead.

        Note that for the majority of inplace ops, this isn't actually necessary
        (because most inplace ops return "self" as their output).
        This isn't generally true for all mutable ops though, which is why
        we need to actually replace all of the arguments.

        We also need to update our metadata of Dict[StorageWeakRef, Set[Node]],
        That maps a given tensor storage to the set of all nodes that take in that storage
        as an input.
        Specifically, re-inplacing `b = foo(a)` causes "a" and "b"'s sets to get fused
        together.

    (5) Any "view_inverse/scatter" nodes that were identified as "it's ok to ignore them"
        during step (3) get manually deleted from the graph.
        Their outputs are no longer used, so technically standard DCE would be able
        to do this, but we can no longer run FX's DCE pass now that we have mutable
        ops in the graph.
    c                 S   s:   h | ]}|j d krt|jd tjrt|jd  qS )placeholderr,   )r:   r   r3   r   rK   r   r?   )rN   r+   r   r   r   	<setcomp>  s    
zreinplace.<locals>.<setcomp>r,   c                    s(   t | trt|     d S d S rX   )r   r   r   r?   rw   r   )r   storage_to_nodesr   r   _add_to_map	  s   
zreinplace.<locals>._add_to_mapr.   r   r   FTc                    s   g | ]}| u r|qS r   r   rN   rx   )self_argr   r   rR   C  s    zreinplace.<locals>.<listcomp>r-   r   Nc                    s$   g | ]}|j d   j d  kr|qS )r-   r   )rN   r   )r+   r   r   rR     s    c                    s   | kr S | S rX   r   )rO   )newoldr   r   replace_arg  s   zreinplace.<locals>.replace_argc                 S   "   h | ]}t |trt| qS r   r   r   r   r?   r   r   r   r   r         
c                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   )2r*   rU   graphnodesr   r{   r3   pytree	tree_map_r:   r   r5   r   r   r   r   r   r   rY   
TensorTyper4   tree_leavesr`   numeldtype_debug_has_internal_overlapr6   r7   resizer9   r   r?   r   r   r   inserting_beforecreate_nodetupler   r8   rw   rt   update	itertoolschainr|   r
   r   
erase_node	recompile)!gmsample_argsinput_storagesr   &all_later_view_inverse_nodes_to_deleteself_flattenednode_flattenedself_has_wrong_metadata	self_meta	node_metaself_arg_storager   r   later_view_inverse_node_usagescan_reinplaceview_opmutated_slice_noderemaining_slice_args
slice_noderi   curr_node_storagenodes_to_updatenode_to_updater   old_flattened_resnode_flattened_resold_res_storagenode_res_storagenew_flattened_resnew_res_storagenew_refnode_ref	to_deleter   )r   r   r+   r   r   r   r   r     s   J

	

	








B)0r<   r   collectionsr   enumr   r   torch._subclasses.fake_tensorr   r   torch.fxr   torch.fx._compatibilityr    torch.multiprocessing.reductionsr   torch.utilsr	   r   torch.utils._pytreer
   __all__r   r$   r(   fxInterpreterr*   re   rt   r6   r7   diagonal_scatterr9   diagonalselect_scatterselectintslice_scatterslicerK   as_strided_scatter
as_stridedr   r{   r   r   r   r   r   r   r   <module>   sD   B'!
3