
    /_i$                        d dl Z d dlZd dlZd dlZd dlZ	 d dlZd dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ 	 d dlmZ d dlmZm Z m!Z!m"Z" 	 d dl#Z$d dl%m&Z' ejP                  j8                  ejP                  jR                  gZ*d Z+ejP                  jF                  d        Z,ejP                  jF                  d        Z-ejP                  jF                  d	        Z.ejP                  jF                  d
        Z/ejP                  ja                  e1e2fd      ejP                  jF                  d               Z3ejP                  jF                  d        Z4ejP                  jF                  d        Z5ejP                  jF                  d        Z6ejP                  jF                  d        Z7ejP                  jF                  d        Z8ejP                  jF                  ejP                  js                  ddgdgg ejt                  d      dk   ejt                  dd      dk   ejt                  dd      jw                   ejx                               dk  f      ejP                  js                  dd      d                      Z=ejP                  jF                  d        Z>ejP                  j~                  d        Z@ejP                  j~                  d        ZAejP                  jF                  d         ZBejP                  jF                  ejP                  j~                  d!               ZCd" ZDd# ZEd$ ZFejP                  jF                  d%        ZGejP                  jF                  d&        ZHejP                  jF                  d'        ZIejP                  j
                  d(        ZJejP                  j
                  d)        ZKejP                  j
                  d*        ZLdRd+ZMd, ZNejP                  j
                  ejP                  js                  d-d.d/g      d0               ZOejP                  j
                  d1        ZPejP                  j
                  d2        ZQejP                  j
                  ejP                  js                  d-d.d/g      d3               ZRd4 ZSd5 ZT	 	 	 dSd6ZU	 dTd7ZVejP                  jF                  d8        ZWejP                  jF                  d9        ZXejP                  jF                  d:        ZYejP                  jF                  d;        ZZejP                  jF                  d<        Z[ejP                  jF                  ejP                  j~                  d=               Z\ejP                  jF                  ejP                  j~                  ejP                  j                  ej                  d>k(  d?@      dA                      Z_ejP                  jF                  ejP                  j~                  dB               Z`ejP                  jF                  dC        ZadUdDZbejP                  jF                  dE        ZcejP                  jF                  dF        ZddG ZedH ZfdI ZgdJ ZhdK ZidL ZjdM ZkejP                  js                  dNdO      dP        ZlejP                  jF                  dQ        Zmy# e$ r dZY w xY w# e$ r dZY lw xY w# e$ r dxZ$Z'Y qw xY w)V    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_test_table_write_tablec                    t        j                  dg di      }| dz  }|j                          |dz  }t        j                  |t        |             t        j                  |t                     }|j                  |      sJ t        j                  dt        j                  |             }|j                  |      sJ y )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr	   _filesystem_uri)tempdirr   	directorypathresults        a/var/www/html/land_sniper/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_urir'   ;   s    HHc9%&E*$IOO~%DNN5#d)$ ]]*,F== ]]D,@,@,IKF==    c                 0    t               }t        ||        y N)r   _partition_test_for_filesystem)r"   locals     r&   test_read_partitioned_directoryr-   N   s    E"5'2r(   c                     t               }| }t        ||       t        j                  |      }|j	                  dg      }|j
                  dgk(  sJ y )Nvaluescolumns)r   r+   r   ParquetDatasetreadcolumn_names)r"   r,   	base_pathdatasetr%   s        r&   'test_read_partitioned_columns_selectionr7   T   sS     EI"5)4	*G\\8*\-F8*,,,r(   c                 h   t               }| }ddg}g d}ddg}d|gd|gd|gg}t        j                  t        j                  |d	
      j                  d      t        j                  t        j                  t        j                  |t        
      d      d      t        j                  t        j                  t        j                  |d
      d      d      t        j                  d      d      }t        ||||       t        j                  ||g d      }|j                         }	|	j                         j                  d      }
d|
d   j                  vsJ d|
d   j                  vsJ d|
d   j                  vsJ g dddgg}t        j                  |||      }|j                         }	|	j                         j                  d      }
|
d   dk(  |
d   dk7  z  |
d   dk(  z  }t        j                  |
d         dk(  |
d   dk(  z  }|j!                         dkD  sJ |j!                         dkD  sJ |
j"                  d   |j!                         |j!                         z   k(  sJ dggdggfD ]9  }t        j                  |||      }|j                         j$                  dk(  r9J  y )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r<   r=   r>   r/   ))r<   =r   )r=   !=r:   )r>   ==Truer   filtersdropr:   )r<   rG   r   )r>   rI   FalserJ   rO   )r=   rI   s   1 a)r=   rI   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r2   r3   	to_pandasreset_indexr/   sumshapenum_rows)r"   r,   r5   integer_keysstring_keysboolean_keyspartition_specdfr6   r   	result_dfrL   df_filter_1df_filter_2s                 r&   test_filters_equivalencyrf   a   s   EIq6L!K%=L	L!	;	L!N 
88L5<<R@''"''"((;f"EqI1M77277288L#GKQO))B-	 
B $E9nbI e,G
 LLNE"..D.9IIi(/////i)00000	),33333	

 
89G eW6GLLNE!--4-8I Y'1,X#%'Y6)+K 88Ii01Q6Y7*,K??q   ??q   ??1+//"3koo6G"GHHH/01./02 ,##%:||~&&!+++	,r(   c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  ||d	d
g      }|j                         }|j                         j                  d      j                  d      }	t        t        |	d   j                        D 
cg c]  }
|
 }}
|ddgk(  sJ y c c}
w )Nr   r   r   r      integersrC   r?   r@   indexrj   rl   r0   )rj   <ri   )rj   >r   rK   byTrM   r   r   r   rP   rQ   rR   rW   rS   rX   r   r2   r3   rY   sort_valuesrZ   mapintr/   r"   r,   r5   r^   ra   Nrb   r6   r   rc   xresult_lists               r&   %test_filters_cutoff_exclusive_integerry      s   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbIe  
G LLNE""{g{."{{-  "#y'<'C'CDE1EKE1a&    Fs   #	C8z5Loss of type information in creation of categoricals.)raisesreasonc           	      8   t               }| }t        j                  ddd      t        j                  ddd      t        j                  ddd      t        j                  ddd      t        j                  ddd      g}d|gg}d	}t        j                  t        j                  |      t        j                  |d
      dddg      }t        ||||       t        j                  ||ddg      }|j                         }|j                         j                  d      j                  d      }	t        j                  t        j                  t        j                  ddd      gd
      t        j                  |d
            }
|	d   j                   |
k(  sJ y )Ni  ri   	   
            datesrC   
datetime64r@   )rl   r   rl   r0   )r   rm   z
2018-04-12)r   rn   z
2018-04-10rK   ro   TrM   
categories)r   datetimedaterP   rQ   rR   rW   rS   rX   r   r2   r3   rY   rr   rZ   Categoricalr/   )r"   r,   r5   	date_keysra   rv   rb   r6   r   rc   expecteds              r&   &test_filters_cutoff_exclusive_datetimer      ss    EI 	dAq!dAr"dAr"dAr"dAr"I 
)N 	
A	1)<8 !
#B
 $E9nbIe((
G LLNE""{g{."{{-  ~~
(--a,-\B88I\:<H W$$000r(   c           
      J   | dz  }t        j                  t        j                  ddd      t        d      d      j	                  |d       t        j                  |d	d
t        j                  ddd      fg      }|j                  d      j                         g dk(  sJ y )Nztimestamps.parquetz
2020-01-01r~   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   rC   rL   r   rh   )
rP   rQ   
date_rangerange
to_parquetr   r   r   column	to_pylist)r"   r$   r   s      r&   test_filters_inclusive_datetimer      s     ))DLL|RcBBi  z$z=MM$	$))$156) E <<'')_<<<r(   c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  ||d	d
g      }|j                         }|j                         j                  d      j                  d      }	t        t        |	d   j                        D 
cg c]  }
t        |
       }}
|ddgk(  sJ y c c}
w )Nrh   rj   rC   r?   r@   rk   rl   r0   )rj   r   r   )rj   z>=r   rK   ro   TrM   r   r   rq   ru   s               r&   test_filters_inclusive_integerr     s   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbIe!!
G LLNE"++)+4+(  $'sIj,A,H,H#IJa3q6JKJ1a&    Ks   #Dc                     t               }| }ddg}g d}ddg}d|gd|gd|gg}t        j                  t        j                  |d	
      j                  d      t        j                  t        j                  t        j                  |t        
      d      d      t        j                  t        j                  t        j                  |d
      d      d      t        j                  d      d      }t        ||||       t        j                  ||dg      }|j                         }	|	j                         j                  d      }
d|
d   j                  v sJ d|
d   j                  v sJ d|
d   j                  vsJ t        j                  ||dddgfddddhfg      }|j                         }	|	j                         j                  d      }
d|
d   j                  vsJ d|
d   j                  vsJ d|
d   j                  vsJ y )Nr   r   r9   TFr<   r=   r>   r?   r@   rB   rC   r   rD   r   rE   rF   )r=   inabrK   rM   r   r:   r;   r   )r=   r   r   r:   znot inrO   )r   rP   rQ   rR   rS   rT   rU   rV   rW   rX   r   r2   r3   rY   rZ   r/   )r"   r,   r5   r^   r_   r`   ra   rb   r6   r   rc   s              r&   test_filters_inclusive_setr   .  s
   EIq6L!K%=L	L!	;	L!N 
88L5<<R@''"''"((;f"EqI1M77277288L#GKQO))B-	 
B $E9nbIe'(G LLNE"..D.9I)H%,,,,,)H%,,,,,i)00000eTA3')EXy13G
 LLNE"..D.9IIi(/////i)00000	),33333r(   c                 N   t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  t              5  t        j                  ||d	g
       d d d        t        j                  t              5  t        j                  ||dg
       d d d        t        j                  ||ddt               fg
      }|j                         j                  dk(  sJ t        j                  ||dddhfg
      }t        j                  t               5  |j                         j                  dk(  sJ 	 d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nrh   rj   rC   r?   r@   rk   rl   r0   )rj   r   r   rK   )rj   z=<r   r   r   rH   r   )r   rP   rQ   rR   rW   rS   rX   pytestrz   	TypeErrorr   r2   
ValueErrorsetr3   r]   NotImplementedError)r"   r,   r5   r^   ra   rv   rb   r6   s           r&   test_filters_invalid_pred_opr   ]  s   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbI	y	! =
)%*#8";	==
 
z	" =
)%*#8";	== 	+0*4dCE)B(EGG <<>""a'''	+0*4dQC)@(ACG 
*	+ ,||~&&!+++, ,'= =
= =, ,s$   F>F FFFF$c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       d	}t        j                  t        |
      5  t        j                  ||dg      j                          d d d        y # 1 sw Y   y xY w)Nrh   rj   rC   r?   r@   rk   rl   r0   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrm   r   rK   )r   rP   rQ   rR   rW   rS   rX   r   rz   r   r   r2   r3   )r"   r,   r5   r^   ra   rv   rb   msgs           r&   test_filters_invalid_columnr     s     EI"L!<01N	A	1HH\6 $
&B
 $E9nbI
>C	z	- N
)#B"E	GGKtvN N Ns   (B88CrL   )rj   rm   r   rj   r   nestedr   r:   read_method)r   read_pandasc                    t        t        |      }t               }| }g d}d|gg}t        |      }t	        j
                  t        j                  |      t        j                  |d      t        j                  t        |      D 	cg c]  }	|	t        |	      d c}	      d      }
t        ||||
       t        ||      } ||fi |}|j                  dk(  sJ y c c}	w )	Nrh   rj   r?   r@   r   )rl   rj   r   rK   r   )getattrr   r   lenrP   rQ   rR   rW   rS   r   r   rX   dictr]   )r"   rL   r   r3   r,   r5   r^   ra   rv   irb   kwargsr   s                r&   test_filters_read_tabler     s     2{#DEI"L	\"N 	LA	1HH\6((58Da!#a&1DE 
B $E9nbIUG4F%f%E>>Q Es   Cc                 h   t               }| }ddg}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg	      }t        ||||       t        j                  |      }|j                         }|j                  d      j                         |k(  sJ y )
N2019_22019_3	year_weekr   rV   r@   )rl   r   rl   r0   )r   rP   rQ   rR   rW   rS   rX   r   r2   r3   r   r   )	r"   r,   r5   r_   ra   rv   rb   r6   r%   s	            r&   $test_partition_keys_with_underscoresr     s     EIX&K	k"N 	
A	1XXk: %
'B
 $E9nbI	*G\\^F==%//1[@@@r(   c                     | \  }}|dz   }t        j                  dg di      }t        |||       t        ||      }|j	                  |      sJ y Nz/test.parquetr   r   r   r   r   r   r   r    )s3_example_s3fsfsr$   r   r%   s        r&   test_read_s3fsr     sR    HB/!DHHc9%&E,"-F==r(   c                     | \  }}|dz   }t        j                  dg di      }t        |||       t        ||      }|j	                  |      sJ y r   r   )r   r   r#   r$   r   r%   s         r&   test_read_directory_s3fsr     sR    #MB	&DHHc9%&E,r2F==r(   c                     t        | dz        }t        j                  dg di      }t        ||       t	        j
                  |g      j                         }|j                  |      sJ y )Nr   r   r   )r   r   r   r   r   r2   r3   r    )r"   	data_pathr   r%   s       r&   test_read_single_file_listr     s[    Gn,-IHHc9%&E	"	{+002F==r(   c                 &    | \  }}t        ||       y r*   )r+   r   r   r$   s      r&   $test_read_partitioned_directory_s3fsr     s     HB"2t,r(   c                    ddg}g d}d|gd|gg}d}t        j                  t        j                  |      t        j                  |d      j                  d	      t        j                  t        j                  t        j                  |t              d
      d      t        j                  j                  |      dg d      }t        | |||       t        j                  ||       }|j                         }|j                         j                  d      j!                  d      }	|j                  d      j!                  d      j#                  |	j$                        }
|
d   j'                  d      |
d<   |
d   j'                  d      |
d<   |	j$                  g dk(  j)                         sJ t+        j,                  |	|
       y )Nr   r   r9   foobarrE   r?   r@   rB   rC   r   )rl   r   r   r/   r0   r   rl   ro   TrM   category)rl   r/   r   r   )rP   rQ   rR   rW   rS   rT   rU   rV   randomrandnrX   r   r2   r3   rY   rr   rZ   reindexr1   astypealltmassert_frame_equal)r   r5   foo_keysbar_keysra   rv   rb   r6   r   rc   expected_dfs              r&   r+   r+      s   1vHH		N 	A	1xx-44R8wwrwwrxx?CQG))//!$	
 1
2B $B	>2F	b9GLLNE"++)+4+(  >>W>-KTK*GI$5$5G6  %U+22:>K$U+22:>K!BBGGIII)[1r(   c           	           t         t              st        t                      t	              t         dt         dd             fd |dg        y )Npathsepsep/c                 Z   |   \  }}|D ]v  }|||fgz   }j                  t        |       | d| g      }j                  |       |dz
  k(  rddlm} j                  |t               g      }	t        |      }
t        j                  j                  |
      }j                  |	      5 }t        ||       d d d        j                  |	      j                  |j                  k7  sJ j                  |	      j                  |j                  k(  sJ j                  |dg      }j                  |      5 }	 d d d        < ||dz   |       j                  |dg      }j                  |      5 }	 d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)NrG   r   r   )FileType_SUCCESS)joinr   
create_dir
pyarrow.fsr   r
   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer/   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelrb   r   ra   r   s                 r&   r   z5_generate_partition_directories.<locals>._visit_level2  s   %e,f 	E&4-8NH&%!& I MM)$	!/#LL)TV)<=	/NCXX11+>
**95 0 Q/0''	2778;L;LLLL''	2778==HHH&||Y
,CD**<8 A  Y	>B&||Y
,CD**<8 A 7	 0 0 
 s$   /F<F9F F	F	 F*	r   )
isinstancer   r   r   r   r   )r   r   ra   rb   r   r   r   s   ` ``@@@r&   rX   rX   '  sX     b*%-+,Eb)WR%<=G @ 1b!r(   c                 D   t        j                  t        |       t              }g }|D ]`  \  }}|j	                  |       t        |t        j                  t        j                  f      rt        j                  |      }|| |   |k(  z  }b | |   j                  |d      S )Nr@   r   )axis)rR   onesr   rD   appendr   r   r   rP   	TimestamprN   )rb   r   	predicateto_dropr   r   s         r&   r   r   U  s    Bt,IG  'et ehmmX->->?@LL'ERX&&	' i=gA..r(   c                 6   | dz  }|j                          t        j                  j                  t	        j
                  dg di            }t        j                  ||dz         | dz  }|j                          t        j                  j                  t	        j
                  dg di            }t        j                  ||dz         t        j                  | dgg      }|j                  d      j                  t        j                  g dg            sJ y )	NzA=0Br   r   zA=1r9   )ArI   r   r   )r   r   r   r   rP   rQ   r   r   r   r   r    chunked_array)r"   dir1table1dir2table2r   s         r&   "test_filter_before_validate_schemar	  e  s     U?DJJLXX!!",,Y/?"@AFNN64.01U?DJJLXX!!",,_/E"FGFNN64.01 MM'^,<+=>E<<##B$4$4i[$ABBBr(   c                 r   d}d}| t               z  }|j                          g }g }t        |      D ]  }t        ||      }|d   j	                  t
        j                        |d<   || dz  }t        j                  j                  |      }	t        |	|       |j                  |	       |j                  |        |dz  j                          dd}
 |
|      }t        j                  |      }|j                  |      sJ d	d
d|j                  dz
  g}|D cg c]  }|j!                  |      j"                   }}t%        j&                  ||      }t        j                  j)                  |D cg c]  }|j+                  |       c}||j,                  j.                        }|j                  |      sJ t%        j&                  |d       t        |      j0                  d d d df   }| t                dz  }t        j                  j                  |      }t        ||       y c c}w c c}w )Nr~   rC   seeduint32.parquetz_SUCCESS.crcTc                 T    t        j                  | fi |}|j                  ||      S )N)r1   use_threads)r   r2   r3   )pathsr1   r  r   r6   s        r&   read_multiple_filesz5test_read_multiple_files.<locals>.read_multiple_files  s*    ##E4V4||G|EEr(   r   r      r   r0   )namesmetadata)r  ri   )NT)r
   r   r   r   r   rR   int64r   r   r   r   r   touchconcat_tablesr    num_columnsfieldr   r   r   from_arraysr   schemar  iloc)r"   nfilessizedirpath	test_datar  r   rb   r$   r   r  r%   r   to_read	col_namesout	bad_applebad_apple_pathts                      r&   test_read_multiple_filesr(  {  s   FDGMMOIE6] T* (|**28848A3h'$$R(UD!T ~$$&F !'F	*H==""" !Q**Q./G/67!a%%7I7
--
3Cxx##w$G!V]]1%5$G*3-3]]-C-C $ EH ::h MM't,  1-221bqb59I$&22N
Y'AN# 8$Gs   "H/3H4c                    d}d}| t               z  }|j                          g }g }g }t        |      D ]  }t        ||      }t	        j
                  ||z  |dz   |z        |_        d|j                  _        || dz  }	t        j                  j                  |      }
t        |
|	       |j                  |
       |j                  |       |j                  |	        t        j                  |      }ddg}|j                  |      j!                         }t#        j$                  |D cg c]  }||   	 c}      }t'        j(                  ||       |j                  t+        |            j!                         }|j,                  |j,                  k(  sJ t'        j(                  |j/                  |j0                        |       y c c}w )	NrC   r  r   rl   r  uint8stringsr0   )r
   r   r   r   rR   rW   rl   r   r   r   r   r   r   r   r2   r   rY   rP   concatr   r   r   r\   r   r1   )r"   r  r  r   r!  framesr  r   rb   r$   r   r6   r1   r%   rw   r   s                   r&   test_dataset_read_pandasr.    s   FDGMMOIFE6] T*99QXA~6A3h'$$R(UD!bT (G	"G   1;;=Fyyf5!G*56H&(+   W 6@@BF<<8>>)))&..1A1A.BHM 6s   ,G
c                     | t               z  }|j                          t        dd      }|dz  }t        ||d       t	        j
                  |d      }|j                         j                  |      sJ y )	Nr~   r   r  	0.parquet2.6versionT)
memory_map)r
   r   r   r   r   r2   r3   r    )r"   r   r   r$   r6   s        r&   test_dataset_memory_mapr5    sj     GMMO#E[ De,D"G<<>  '''r(   c                    | t               z  }|j                          t        dd      }|dz  }t        ||d       t	        j
                  t              5  t        j                  |d       d d d        d	D ]:  }t        j                  ||      }|j                         j                  |      r:J  y # 1 sw Y   IxY w)
Nr~   r   r  r0  r1  r2  i)buffer_size)   i   )r
   r   r   r   r   rz   r   r   r2   r3   r    )r"   r   r   r$   r7  r6   s         r&   #test_dataset_enable_buffered_streamr9    s    GMMO#E[ De,	z	" &
	&& # ,##.||~$$U+++,	& &s   B77C c                 P   | t               z  }|j                          t        dd      }|dz  }t        ||d       dD ]d  }t	        j
                  ||      }|j                         j                  |      sJ t	        j                  ||      }|j                  |      rdJ  y )	Nr~   r   r  r0  r1  r2  )TF)
pre_buffer)	r
   r   r   r   r   r2   r3   r    r   )r"   r   r   r$   r;  r6   actuals          r&   test_dataset_enable_pre_bufferr=    s    GMMO#E[ De,# $
##
,||~$$U+++w:>}}U###$r(   c                     g }g }t        |      D ]C  }t        ||      }| | dz  }|j                  t        ||             |j                  |       E |S )Nr  r  )r   r   r   r   )r5   r  
file_nrowsr!  r  r   r   r$   s           r&   _make_example_multifile_datasetr@    sc    IE6] JQ/aS>)eT23T Lr(   c                     |D cg c]  }t        |j                                }}t        |      t        | j                        k(  sJ y c c}w r*   )r   as_posixr   files)r6   r  r$   s      r&   _assert_dataset_pathsrD  (  s@    .34dS!4E4u:W]]++++ 5s    A
dir_prefix_.c                     | t               z  }|j                          t        |dd      }|| dz  j                          t        j                  |      }t        ||       y )Nr~   rC   r  r?  stagingr
   r   r@  r   r2   rD  r"   rE  r   r  r6   s        r&   test_ignore_private_directoriesrM  -  s^     GMMO+GB78:E *W%%,,.(G'5)r(   c                    | t               z  }|j                          t        |dd      }|dz  j                  d      5 }|j	                  d       d d d        |dz  j                  d      5 }|j	                  d       d d d        t        j                  |      }t        ||       y # 1 sw Y   YxY w# 1 sw Y   7xY w)Nr~   rC   rI  z	.DS_Storewbs	   gibberishz.privater
   r   r@  openwriter   r2   rD  r"   r   r  r   r6   s        r&   test_ignore_hidden_files_dotrT  >  s    GMMO+GB78:E K
	%	%d	+ q	 J
	$	$T	* a	 (G'5)      B*.B6*B36B?c                    | t               z  }|j                          t        |dd      }|dz  j                  d      5 }|j	                  d       d d d        |dz  j                  d      5 }|j	                  d       d d d        t        j                  |      }t        ||       y # 1 sw Y   YxY w# 1 sw Y   7xY w)Nr~   rC   rI  _committed_123rO  s   abcd_started_321rP  rS  s        r&   #test_ignore_hidden_files_underscorerY  Q  s    GMMO+GB78:E $
$	*	*4	0 A	 N
"	(	(	. !	 (G'5)  rU  c                     | | dz  t               z  }|j                  d       t        |dd      }t        j                  |      }t        ||       t        j                  |      }t        ||       y )NdataTparentsr~   rC   rI  rK  rL  s        r&   /test_ignore_no_private_directories_in_base_pathr^  d  ss    
 :,d++df4GMM$M+GB78:E &G'5) (G'5)r(   c           	         dgdz  dgdz  z   }t        j                  t        j                  t        t	        |                  t        j                  |      j                         gddg      }t        j                  |t        |       dg       | dz  }|j                          t        j                  |t        |      dg       t        j                  | d	g
      }|j                  |      sJ y )Nxxxr   yyyrl   _partr  partition_cols_private_duplicate_private)ignore_prefixes)r   r   rS   r   r   dictionary_encoder   write_to_datasetr   r   r   r    )r"   partr   private_duplicater3   s        r&   test_ignore_custom_prefixesrm  w  s    7Q;%1$DHH
s4y!"
((* w!E
 s7|WIF"66s#45(/y2 ==*/D ;;ur(   c                     | dz  }|j                          t        j                  |      }|j                         }|j                  dk(  sJ |j
                  dk(  sJ y )Nr6   r   )r   r   r2   r3   r]   r  )r"   	empty_dirr6   r%   s       r&   test_empty_directoryrp    sW    )#IOO	*G\\^F??a"""r(   c                    dd l }dd lm} dd lm}  |j
                  t        d      t        d      t        t        d            t        j                  gdz  t        j                  ddd      j                  d	      d
      }|j                  j                         }ddg}	t        j                  j!                  ||dd      }
 |j"                  |
| |	|       t$        j&                  j)                  t+        |       d      }|9|j-                  |d      5 } |j.                  |
j0                  |       d d d        n |j.                  |
j0                  |        |j2                  | |      }t5        |j0                  j6                        }|t5        |
j0                  j6                        k(  sJ |j9                         }|j;                         }|j                  j                         }|	|dt=        |	      z  d  k(  sJ ||   }|	D ]  }||   j                  d      ||<    |r@|j?                  d      j@                  jC                         }|d   j                  |      |d<    |jD                  ||       y # 1 sw Y   *xY w)Nr   
aaabbbbccc
eefeffgeeer~   
2017-01-01
2017-01-11datetime64[D]r@   datetime64[ns])group1group2numnanr   rx  ry  F)r  safepreserve_indexr   _common_metadatarO  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrQ   listr   rR   r{  rW   r   r1   tolistr   r   r   rj  osr$   r   r   rQ  write_metadatar  r2   r   r  r3   rY   r   r  r   to_pandas_dtyper   )r5   r   r  
index_namerP   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r6   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_types                       r&   &_test_write_to_dataset_with_partitionsr    s]      |$|$E"Ix"}		,OLSS I ##%Dh'L88''	&u7< ( >LBi#-/ GGLLY1CDM__]D1 	6QBl1115	6 	6 	,--}=b	+57G w~~++,L3|22889999,,.K$$&H $$++-M=c,.?)?)@AAAA~H ;"3..z:	#; #\\&166FFH%f-445GH	&B)X.=	6 	6s   I<<Jc           
         dd l }dd lm}  |j                  t	        d      t	        d      t	        t        d            t        j                  ddd      j                  d	      d
      }|j                  j                         }t        j                  j                  |      }|t               }n$t        |t               st#        t%        |            }d}t        |      D ]  } |j&                  || |        t)        t+        |       dd      }	|j-                  |	      }
|
D cg c]   }|j.                  j1                  d      s|" }}t3        |      |k(  sJ  |j4                  | |      j7                         }|j9                         }|j;                         }||   }t=        j>                  ||       y c c}w )Nr   rr  rs  r~   rt  ru  rv  r@   rw  )rx  ry  rz  r   rC   r   FT)allow_not_found	recursiver  ) r  r  r  rQ   r  r   rR   rW   r   r1   r  r   r   r   r   r   r   r   r   rj  r   r   r   r$   endswithr   r2   r3   rY   drop_duplicatesr   r   )r5   r   rP   r   r  r  r  nr   selectorinfosinfooutput_filesr  r  s                  r&   $_test_write_to_dataset_no_partitionsr    s     |$|$E"I		,OLSS	 I ##%D88''	2L$&

J/!-
";<
 	
A1X 3L)'1	33 C	NE&*,H $$X.E%*MTdii.@.@.LDMLM|!!! $"##j
df  $$&H'')H~H)X. Ns   ( F=	F=c                 ,    t        t        |              y r*   r  r   r"   s    r&   %test_write_to_dataset_with_partitionsr    s    *3w<8r(   c                    t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j
                               t        j                  dt        j                  d            g      }t        t        |       |	       y )
Nrx  )r   ry  rz  r{  r   us)unitr  )	r   r  r  r=   r  int32	timestampr  r   )r"   r  s     r&   0test_write_to_dataset_with_partitions_and_schemar    s    YY		<		<RXXZ8RXXZ8bll.EF	H IF
 +GV%r(   c                 0    t        t        |       d       y )Nr  )r  r  r  s    r&   4test_write_to_dataset_with_partitions_and_index_namer    s    *G/r(   c                 ,    t        t        |              y r*   )r  r   r  s    r&   #test_write_to_dataset_no_partitionsr    s    (W6r(   c                 <    t        | dz         t        | dz         y )Ntest1test2)r  r  r  s    r&   test_write_to_dataset_pathlibr    s    *7W+<=(7):;r(   c                    |\  }}t        j                  t        d      5  t        | dz  |       d d d        t        j                  t        d      5  t	        | dz  |       d d d        y # 1 sw Y   >xY w# 1 sw Y   y xY w)Nz"path-like objects are only allowedr   r  r   r  )r   rz   r   r  r  )r"   r   r   rF  s       r&   &test_write_to_dataset_pathlib_nonlocalr  #  s     EB	y(L	M ..g"	.. 
y(L	M .,g"	.. .	. .. .s   A/A;/A8;Bwin32z,test fails because of unsupported characters)r{   c                 (    | \  }}t        ||       y Nr   )r  r   s      r&   *test_write_to_dataset_with_partitions_s3fsr  2  s     HB*r(   c                 (    | \  }}t        ||       y r  )r  r   s      r&   (test_write_to_dataset_no_partitions_s3fsr  >  s     HB(r(   c                    t        j                  dg di      }t        j                  j	                  |      }t        |       }t        j                  ||t                      t        j                  |      }|j                  |      sJ y )Nr  r   r   )rP   rQ   r   r   r   r   r   rj  r   r   r    )r"   rb   r   r$   r%   s        r&    test_write_to_dataset_filesystemr  G  sg    	sI&	'BHH  $Ew<Dt0AB]]4 F==r(   c                    | dz  }t               }t        j                  t        j                  |      t        j
                  j                  |      dddg      }t        j                  j                  |      }d}t        j                  ||j                        5 }t        |      D ]  }|j                  |        	 d d d        t        j                  |      }	|	j                   j"                  |k(  sJ | dz  }
|j%                  t'        |
            5 }t        j(                  |j                  |       d d d        t        j*                  | |      }|S # 1 sw Y   xY w# 1 sw Y   .xY w)	Nr   )rl   r/   rl   r/   r0   r   	_metadatar   )r   rP   rQ   rR   rW   r   r   r   r   r   r   ParquetWriterr  r   r   ParquetFiler  num_row_groupsr   r   r  r2   )r"   rv   r$   r,   rb   r   
num_groupswriterr   readerr  r   r6   s                r&   _make_dataset_for_picklingr  R  sB   ^#DE	1))//!$ "
$B HH  $EJ			$	- &z" 	&Au%	&& ^^D!F??))Z777k)M		!	!#m"4	5 +
%,,*+ E#G N& &+ +s   "E!E%E"%E.c                 :    fd}t        |       } ||      sJ y )Nc                 J    | j                  j                  |             k(  S r*   )loadsdumps)objpickle_modules    r&   is_pickleablez*test_pickle_dataset.<locals>.is_pickleablep  s$    m))-*=*=c*BCCCr(   )r  )r"   r  r  r6   s    `  r&   test_pickle_datasetr  n  s#    D )1G!!!r(   c                 H   | dz  }t        j                  g dg dg dd      }t        j                  j	                  |      }t        j                  |t        |      ddg       t        j                  |      j                         }t        j                  ||d	z         y )
Nz
ARROW-3208)r  r~   g      @d     r   g333333=@)r  r~   r   r  r  r   r   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathre  zoutput.parquet)rP   rQ   r   r   r   r   rj  r   r2   r3   r   )r"   r$   rb   r   s       r&   test_partitioned_datasetr  w  s     \!D	0,& 
B
 HH  $ET(-u~7d#((*ENN5$!112r(   c                    | dz  }t        j                  t        d      D cg c]  }t        j                  d       c}dz  gdg      }t        j                  t        d      D cg c]  }t        j                  d       c}dz  gdg      }t        j                  |t        |             t        j                  |t        |             t        j                  |dg      j                         }|d   j                  d      j                         |d   j                  d      j                         g}|d   j                  d	k(  sJ |d   j                  d      |d   j                  d
      }}|j                  |d         r|j                  |d
         sJ y |j                  |d
         sJ |j                  |d         sJ y c c}w c c}w )NzARROW-3325-datasetrC   r~   f0rc  )r  )read_dictionaryr   r   r   )r   r   r   r	   randsr   rj  r   r2   r3   chunkri  
num_chunksr    )	r"   r$   r   t1t2r%   	ex_chunksc0c1s	            r&   test_dataset_read_dictionaryr    s   ))D	E!H5qDJJrN5:;D6	JB	E!H5qDJJrN5:;D6	JBc$i0c$i0tf&&*df  AQ113AQ1135I !91$$$AY__Q!3B	yy1yy1&&&yy1&&&yy1&&&% 65s   G(Gc                    t        j                  dt        j                  g dt        j                               i      }t	        j
                  || dz         t	        j
                  || dz         t        j                  dg      }t	        j                  | dz  |      }t        j                  dg di|      }|j                  |      sJ t	        j                  | |      }t        j                  dg di|      }|j                  |      sJ t	        j                  | |      }t        j                  dg di|      }|j                         j                  |      sJ y )Nr   r   zdata1.parquetzdata2.parquet)r   r  r  )r   r   r   r   r   r   )r   r   rS   r  r   r   r  r   r    r2   r3   )r"   r   r  r%   r   s        r&   test_read_table_schemar    s   HHc288Irxxz:;<ENN5'O34NN5'O34YY'(F ]]7_4VDFxxi(8H==""" ]]762Fxx01&AH=="""wv6Fxx01&AH;;=)))r(   c                    t        j                  t        j                  g dt        j                               t        j                  g dt        j                               d      }t        j                  || dz         t        j                  | dz  ddg      }t        j                  ddg      }|j                  ddgk(  sJ |j                  |k(  sJ y )Nr   r   r   r   r0   )r   r  )
r   r   rS   r  r*  r   r   r   r  r4   )r"   r   r%   expected_schemas       r&   *test_read_table_duplicate_column_selectionr    s    HH288Irxxz:88Irxxz:< =ENN5'N23]]7^3c3ZHFii @AO3*,,,==O+++r(   c                    dd l m} | dz  }|dz  dz  dz  j                  d       t        j                  dg d	i      }t        j                  |t        |dz  dz  dz  d
z               |j                  g d      }t        j                  t        |      |      }|j                  g dk(  sJ t        j                  t        |      |      j                         }|j                  g dk(  sJ y )Nr   test_partitioning20121001Tr\  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr6   r   r   r   r   r   r   r  r   r4   r2   r3   )r"   dsr  r   rk  r%   s         r&   test_dataset_partitioningr    s      --I$%,,T,:HHc9%&ENNs9v%,t3nDEG ??'??@D]]IT+F"????IT++/46 "????r(   c                    t        j                  dg di      }t        j                  || dz         t	        t        |       t                     }t        j                  d|      }|j                         }|j                  |      sJ y )Nr   r   r   rG  r   )
r   r   r   r   r   r   r   r2   r3   r    )r"   r   r   r6   r%   s        r&   #test_parquet_dataset_new_filesystemr    sj    HHc9%&ENN5'N23"3w<1BCJ
;G\\^F==r(   c                 ^   t        j                  d      }|j                  d      }t        j                  dg di      }t        j                  || dz         t        |       j                  dd      }t        j                  ||      }|d	z   }|j                  d
   j                  |k(  sJ y )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer2   	fragmentsr$   )r"   r  r   r   r$   r6   r   s          r&   6test_parquet_dataset_partitions_piece_path_with_fsspecr    s       *F""6*JHHc9%&ENN5'N23 w<c*D%G o%HQ$$000r(   c                    t        j                  dg di      }| dz  }g fd}d}t        j                  ||dg||       |dz  dz  |d	z  dz  |d
z  dz  h}t	        t        t        j                              }||k(  sJ y )Nr   r   r  c                 <    j                  | j                         y r*   )r   r$   )written_filepaths_writtens    r&   file_visitorzDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitor  s    \../r(   zpart-{i}.parquet)r  r  basename_template1zpart-0.parquet23)r   r   r   rj  r   rs   pathlibPath)r"   r   r$   r  r  expected_pathspaths_written_setr  s          @r&   .test_parquet_write_to_dataset_exposed_keywordsr    s    HHc9%&E^#DM0 +t3%%1*;=
 	s
%%s
%%s
%%N
 Cm<=...r(   write_dataset_kwarg))r   T)r   Fc                    ddl m} t        j                  dg di      }| dz  }t	        j
                  |j                        }|\  }}|t	        j
                  t        j                        j                  vsJ ||j                  v sJ t        j                  j                  |dd      5 }t        j                  ||fi ||i |j                  d   \  }	}
}||   |k(  sJ 	 ddd       y# 1 sw Y   yxY w)	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r6   r   r   inspect	signaturer  r   rj  
parametersmockpatchrV   
mock_calls)r"   r  r  r   r$   r  keyargmock_write_dataset_name_argsr   s               r&   #test_write_to_dataset_kwargs_passedr    s     !HHc9%&E]"D!!""2"23I"HC g''(;(;<GGGGG)&&&&&			2		> "!
E46C:61<<Q?ufc{c!!!	" " "s   ,7C--C6c                 |   t        j                  t        j                  g dg d      g dd      }t        j                  |      }| dz  }t        j                  || dz  dg       |j                         D cg c]  }|j                         s|j                  ! }}t        |      d	k(  sJ d
|vsJ y c c}w )N)r   r:   r   r9   r   r   )catr  r6   r  rd  r   zcat=c)rP   rQ   r   r   r   r   rj  iterdiris_dirr   r   )r"   rb   r   r$   r   subdirss         r&   'test_write_to_dataset_category_observedr#  '  s    
 
~~o/J 
B HHRLEYDw"E7  $||~<!qvv<G<w<1'!!! =s   <B9B9)r~   rC   )NNNr*   )r  )nr   r  r  r	  sysnumpyrR   ImportErrorr   unittest.mockr  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr	   pyarrow.utilr
   r  r  r   pyarrow.tests.parquet.commonr   r   r   r   r  rP   r  r  r   markr6   
pytestmarkr'   r-   r7   rf   ry   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr  r   r   s3r   r   r   r   r+   rX   r   r	  r(  r.  r5  r9  r=  r@  rD  rM  rT  rY  r^  rm  rp  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r  r  r#   r(   r&   <module>r9     sL  $   	  
    H H   A A kk!!6;;#6#67
 & 3 3
 	- 	- C, C,L ! !B  ~&B	   '1 '1T = =  ! !B +4 +4\ %, %,P N N. /0012""((:.2""((8S1A5""((8S166xrxxzBQF	HI (EF GI 4 A A.             -  -
$2N+"\/  C C* 5$ 5$N "N "NJ ( ( , ,$ $ $ 	,
 Sz2* 3 * * *$ * *$ Sz2* 3 *"*# 7;266::/| 59+/\ 9 9 % % / /
 7 7 < <
 
.  
. CLLG+I  KK  
       8 " " 3 3 '.*.
,@0 1"/0 . 1 "	"* " "](  	B   	B  NBs5   Z1 Z? 
[ 1Z<;Z<?[
	[
	[[