
    /_i[                        d dl Z d dlZ	 d dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZ 	 d dlmZ d dlmZmZmZ 	 d dlZd dlmZ d dlmZmZ ej:                  j                   Zej:                  j,                  d        Zej:                  j,                  d        Z ej:                  j,                  d	        Z!ej:                  j,                  d
        Z"ej:                  j,                  d        Z#ej:                  j,                  d        Z$ej:                  j,                  d        Z%ej:                  j,                  d        Z&ej:                  j,                  d        Z'ej:                  j,                  d        Z(ej:                  j,                  d        Z)ej:                  j,                  d        Z*ej:                  j,                  d        Z+ej:                  j,                  d        Z,ej:                  j,                  d        Z-ej:                  j,                  d        Z.ej:                  j,                  d        Z/ej:                  j,                  d        Z0ej:                  j,                  d        Z1ej:                  j,                  d        Z2ej:                  j,                  d        Z3ej:                  j,                  d        Z4ej:                  j,                  d        Z5ej:                  j,                  d        Z6ej:                  j,                  d        Z7ej:                  j,                  ej:                  jq                  d g d!      ej:                  jq                  d"d#d$g      d%                      Z9ej:                  j,                  d&        Z:ej:                  j,                  d'        Z;y# e$ r dZY w xY w# e$ r dZY w xY w# e$ r dxZZY w xY w)(    N)LocalFileSystemSubTreeFileSystem)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 j   t        d      }| dz  }t        j                  j                  |      }d|j                  j
                  v sJ t        ||       t        j                  |      j
                  }d|v sJ t        j                  |d   j                  d            }|d   dd ddd	d
gk(  sJ y )N'  sizepandas_roundtrip.parquet   pandasutf8index_columnsranger      )kindnamestartstopstep)r   paTablefrom_pandasschemametadatar	   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   jss         `/var/www/html/land_sniper/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadatar+   7   s    	e	$B33H((&&r*K**33333h')22H   	HY'..v6	7BoG,0-.,-$/ #0 0 0 0    c           	         t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j
                               g      }t        j                  t        j                  dt        j                        t        j                  dt        j                        g dd      }t        j                  dd	gd
d gd d gd      }t         j                  j                  ||d      }t         j                  j                  ||d      }|j                  j                  |j                  d      rJ |j                  j                  |j                        sJ t        j                  | dz  |      }|j!                  |       |j!                  |       y )Nintfloatstring   dtype)ABBAEDDAACDC)r.   r/   r0         g?F)r   preserve_indexT)check_metadatazmerged.parquet)r   )r   r   fieldint16float32r0   pd	DataFramenparangeuint8r   r   equalsr    ParquetWriterwrite_table)r%   r   df1df2table1table2writers          r*   :test_merging_parquet_tables_with_different_pandas_metadatarK   K   sb    YY

#
"**,'
299;' F
 ,,yy"((+1BJJ/* C
 ,,1vt, C
 XX!!#fU!KFXX!!#fU!KF}}##FMM$#GGG==...g(88HF
v
vr,   c                    | dz  }t        d      }ddddd|_        t        j                  j	                  |      }d	|j
                  j                  d
   v sJ t        ||       t        j                  |      j                  }t        j                  |d
   j                  d            }d|v sJ |d   |j                  k(  sJ y )Nzmetadata_persistence.parquetr   r   zhalf-precisionzsingle precisionzdouble precisionz%Attributes Persistence Test DataFrame)float16r=   float64
desciptions
   attributesr   r   
attributes)r   attrsr   r   r   r   r   r	   r    r!   r"   r#   r$   )r%   r'   r&   tabler   r)   s         r*   $test_attributes_metadata_persistencerS   h   s     77H	e	$B#%%=	BH HH  $EELL11)<<<<!)22H	HY'..v6	7B2lrxx'''r,   c                    t        d      }t        j                  j                  t	        t        |j                  |j                  d d d               ddg      |_        | dz  }t        j                  j                  |      }|j                  j                  J t        ||       t        j                  |      }|j                         }t!        j"                  ||       y )N
   r   level_1level_2namesr   )r   r>   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar	   r    read_pandas	to_pandastmassert_frame_equal)r%   r&   r'   r(   
table_readdf_reads         r*   %test_pandas_parquet_column_multiindexrg      s    	b	!B**SRZZ"-./)$ + BJ
 33H((&&r*K--999h')J""$G"g&r,   c                    t        d      }| dz  }t        j                  j                  |d      }|j                  j
                  }|d   rJ |d   sJ t        ||       t        j                  |      }|j                  j
                  }|d   rJ |j                  j                  }|j                  j                  |k(  sJ |j                         }t        j                  ||       y )Nr   r   r   Fr9   r   r_   )r   r   r   r   r   r`   r	   r    ra   r   rb   rc   rd   )r%   r&   r'   r(   r)   re   r!   rf   s           r*   <test_pandas_parquet_2_roundtrip_read_pandas_no_index_writtenrj      s    	e	$B33H((&&r%&@K				+	+B/""" i==h')J				*	*B/"""%%..M&&-777""$G"g&r,   c                  D   t        d      } t        j                  j                  |       }t        j                         }t        ||d       |j                         }t        j                  |      }t        |      j                         }t        j                  | |       y )Nr   2.6versionr   r   r   r   BufferOutputStreamr	   getvalueBufferReaderr   rb   rc   rd   r&   r(   imosbufreaderrf   s         r*   )test_pandas_parquet_native_file_roundtriprw      sv    		B((&&r*K  "DdE2
--/C__S!F&!++-G"g&r,   c                  j   t        d      } t        j                  j                  |       }t        j                         }t        ||d       |j                         }t        j                  |      }t        j                  |ddg      j                         }t        j                  | ddg   |       y )Nr   rl   rm   stringsrB   r_   )r   r   r   r   rp   r	   rq   rr   r    ra   rb   rc   rd   rs   s         r*   test_read_pandas_column_subsetr{      s    		B((&&r*K  "DdE2
--/C__S!FnnG,ik  "i12G<r,   c                  D   t        d      } t        j                  j                  |       }t        j                         }t        ||d       |j                         }t        j                  |      }t        |      j                         }t        j                  | |       y )Nr   rl   rm   ro   rs   s         r*   #test_pandas_parquet_empty_roundtripr}      sv    		B((&&r*K  "DdE2
--/C__S!F&!++-G"g&r,   c                      ddiddiddigdd} t        j                  |       }t        j                  j	                  |      }t        j
                         }t        ||       y )	N	page_typer   record_typenon_consecutive_homer   1001)agg_col	uid_first)data)r>   r?   r   r   r   rp   r	   )r   r&   r(   rt   s       r*   !test_pandas_can_write_nested_datar      si     !A#Q'

 D 
4	 B((&&r*K  "Dd#r,   c                    | dz  }d}t        j                  t        j                  |t        j                        t        j                  |t        j
                        t        j                  |t        j                        t        j                  j                  |      dkD  g dd      }t        j                  j                  |      }|j                  d      5 }t        ||d	       d d d        t        j                  |j!                               }t#        |      }|j%                         }t'        j(                  ||       y # 1 sw Y   ^xY w)
Nzpandas_pyfile_roundtrip.parquetr8   r2   r   )foobarNbazqux)int64r=   rN   boolry   wbrl   rm   )r>   r?   r@   rA   r   r=   rN   randomrandnr   r   r   openr	   ioBytesIO
read_bytesr   rb   rc   rd   )	r%   r'   r   r&   r(   fr   re   rf   s	            r*   $test_pandas_parquet_pyfile_roundtripr      s    ::HD	4rxx099T499T4		%)5 
B ((&&r*K	t	 4[!U34 ::h))+,DT"J""$G"g&4 4s   E

Ec                    d}t         j                  j                  d       t        j                  t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t        j
                  |t         j                        t         j                  j                  |      dkD  d      }| dz  }t         j"                  j%                  |      }dD ]B  }t'        ||d|       t)        |      }|j+                         }t-        j.                  ||       D dD ]B  }t'        ||d|	       t)        |      }|j+                         }t-        j.                  ||       D d
D ]q  }	|	dk7  r*t         j0                  j2                  j5                  |	      s2t'        ||d|	       t)        |      }|j+                         }t-        j.                  ||       s y )Nr   r   r2   )rB   uint16uint32uint64int8r<   int32r   r=   rN   r   r   )TFrl   )rn   use_dictionary)rn   write_statistics)NONESNAPPYGZIPLZ4ZSTDr   )rn   compression)r@   r   seedr>   r?   rA   rB   r   r   r   r<   r   r   r=   rN   r   r   r   r   r	   r   rb   rc   rd   libCodecis_available)
r%   r   r&   r'   r(   r   re   rf   r   r   s
             r*   )test_pandas_parquet_configuration_optionsr      s   DIINN1	4rxx0))D		2))D		2))D		2		$bhh/4rxx04rxx04rxx099T499T4		%) 
B 33H((&&r*K' +[(E$2	4 *
&&(
b'*+ * +[(E&6	8 *
&&(
b'*+ A +6!FFLL--k:[(E!,	. *
&&(
b'*+r,   c                      t        d      } t        j                  ddt        |       z  d      | _        d| j                  _        t        | ddi      }t        j                  ||        y )Nd   r   r   rU   r   flavorspark)	r   r@   rA   lenindexr   r
   rc   rd   )r&   results     r*   +test_spark_flavor_preserves_pandas_metadatar   )  sT    	c	"ByyBRL"-BHBHHM(h-@AF&"%r,   c                 &   t        j                  d      dt        j                  d      dit        j                  d      t        j                  d      t        j                  d      t        j                  d      id}t        | dz        }t        j                  |d      j	                  dd	
      }t
        j                  j                  |      }t        ||       t        |      }|j                         }t        j                  ||       y )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r2   r   Fdrop)r>   	Timestampstrr?   	set_indexr   r   r   r	   r   rb   rc   rd   )r%   r   pathdfxtdfxr(   	result_dfs          r*    test_index_column_name_duplicater   3  s     LL./1CLL./1C

 LL./%2 LL./%2	
D w'(D
 ,,t#3
4
>
>vE
>
RC88$Dtd#K%%'I)S)r,   c                    d}t        t        |            }t        j                  j	                  g d|gddg      }t        j
                  d|i|      }t        j                  j                  |      }| dz  }t        ||       t        |      }|j                  |      sJ |j                         }t        j                  ||       y )	Nr1   )r   r   r   foobarsome_numbersrY   numbers)r   zdup_multi_index_levels.parquet)r]   r   r>   r[   from_arraysr?   r   r   r   r	   r   rC   rb   rc   rd   )	r%   num_rowsr   r   r&   rR   r'   result_tabler   s	            r*    test_multiindex_duplicate_valuesr   Q  s    H5?#GMM%%	(( & E
 
y'*%	8BHH  $E99H!x(L<<%%%&&(I)R(r,   c                     d}t        j                  t        j                  |      dd dd      }t	        | dz        }|j                         }t        j                  ||       y )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsep	index_colheaderenginezv0.7.1.parquet)r>   read_csvr   r   r   rb   rc   rd   datadirexpected_stringexpectedrR   r   s        r*   &test_backwards_compatible_index_namingr   g  s[    KO {{2::o6I%)!HFH"223E__F&(+r,   c                     d}t        j                  t        j                  |      dg ddd      j	                         }t        | dz        }|j                         }t        j                  ||       y )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquet)	r>   r   r   r   
sort_indexr   rb   rc   rd   r   s        r*   1test_backwards_compatible_index_multi_level_namedr   |  sj    KO {{


?#- jl	  "BBCE__F&(+r,   c                 .   d}t        j                  t        j                  |      dg ddd      j	                         }|j
                  j                  g d      |_        t        | dz        }|j                         }t        j                  ||       y )	Nr   r   r   r   r   r   )r   Nr   zv0.7.1.some-named-index.parquet)r>   r   r   r   r   r   	set_namesr   rb   rc   rd   r   s        r*   6test_backwards_compatible_index_multi_level_some_namedr     s    KO {{


?#!< jl	 
 ^^--.FGHN"CCDE__F&(+r,   c           	      ^   t        d      t        t        j                        k  rt        j                  d       t        j
                  g dg dt        j                  ddd      d	      }t        j                  j                  g d	t        j                  ddd      gd
d g      |_	        | dz  }t        |      }|j                         }t        j                  ||       t        |dg      }|j                         }t        j                  ||dg   j                  d             y )Nz2.2.0zRegression in pandas 2.2.0r      r1   )g?g?g333333?z
2017-01-01r1   zEurope/Brussels)periodstzabcr   rY   z'v0.7.1.column-metadata-handling.parquetr   rz   Tr   )r   r>   __version__pytestskipr?   
date_ranger[   r   r   r   rb   rc   rd   reset_index)r   r   r   rR   r   s        r*   2test_backwards_compatible_column_metadata_handlingr     s   w72>>22 	01||lmmL!8IJ	LMH ]]..		|Q3D	E	Go / HN
 >>DE__F&(+seE__F&(C5/"="=4"="HIr,   c                  "   t        j                  ddgddggddg      } | d   j                  d      | d<   | j                  dg      } t        j
                  j                  |       }t	        j                         }t        j                  ||       t        j                  |j                               j                         }t        |j                  t         j                        sJ |j                  j!                  | j                        sJ y )	Nr   r   r   dc1c2rz   category)r>   r?   astyper   r   r   r   rp   r    rE   ra   rq   rb   
isinstancer   CategoricalIndexrC   )r&   rR   bosref_dfs       r*   )test_categorical_index_survives_roundtripr     s     
SzC:.t	EB$xz*BtH	tf	BHH  $E



!CNN5#^^CLLN+557FfllB$7$7888<<rxx(((r,   c                     t        j                  dt        j                  g dg dd      i      } t        j                  j                  |       }t        j                         }t        j                  ||       |j                         }t        j                  |      j                         }t        j                  ||        y )Nr   )r   r   r   r   )r   r   r   T)
categoriesordered)r>   r?   Categoricalr   r   r   rp   r    rE   rq   ra   rb   rc   rd   )r&   rR   r   contentsr   s        r*   )test_categorical_order_survives_roundtripr     s     
sBNN$H I 
JB HH  $E



!CNN5#||~H^^H%//1F&"%r,   c                     t        j                  d gdz  dgdz  d      } | j                  ddd      }t        j                  j                  |       }t        j                  j                  |      }t        j                         }t        j                  ||dd       t        j                  |j                               }|d   j                  |d         sJ |d	   j                  |d	         sJ y )
Nr   g      ?)colr.   r   rl   rU   )rn   
chunk_sizer   r   )r>   r?   r   r   r   r   rp   r    rE   
read_tablerq   rC   )r&   df_categoryrR   	table_catru   r   s         r*   *test_pandas_categorical_na_type_row_groupsr    s     
tfslC53;?	@B))JzBCKHH  $E$$[1I



!C NN9c5R@]]3<<>*F !9E!H%%%!9E!H%%%r,   c                  N   t        j                  g dd      } g d}t        j                  dt        j                  j                  | |      i      }t        j                         }t        j                  t        j                  |      |       t        j                  |j                               j                         }|j                  j                  dk(  sJ |j                  j                   j"                  |k(  j%                         sJ t'        j(                  ||       y )N)r   r   r   r   r   rV   r   r   r2   )r   r   r   x)r   r   )r@   arrayr>   r?   r   
from_codesr   rp   r    rE   rR   r  rq   rb   r	  r3   catr   allrc   rd   )codesr   r&   ru   r   s        r*   !test_pandas_categorical_roundtripr    s    
 HH+7;E&J	sBNN55* 6 & ' 
(B 


!CNN288B<%]]3<<>*446F88>>Z'''HHLL##z166888&"%r,   c                    t        t        j                        t        d      k  rt        j                  d       t        j
                  dg did      }|j                  d      }t        j
                  dg di      }|j                  d      }t        j                  |d         j                         t        j                  |d         j                         k(  sJ t        j                  |d   j                  j                  j                        j                         t        j                  |d   j                  j                  j                        j                         k(  sJ t        | dz        }t        j                  t        j                   |      |       t        j"                  |      j%                         }t'        j(                  ||       y )	Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r	  )r   r   r   zstring[pyarrow]r2   r   zcat.parquet)r   r>   r   r   r   r?   r   r   r
  	to_pylistr  r   valuesr   r    rE   rR   r  rb   rc   rd   )r%   rF   rG   r   r   s        r*   )test_categories_with_string_pyarrow_dtyper    sZ    r~~!11PQ
,,23;L
MC
**Z
 C
,,23
4C
**Z
 C 88CH'')RXXc#h-?-I-I-KKKK88CHLL++223==?288C&&D((1	4 4 4 w&'DNN288C=$']]4 **,F&#&r,   c                 0   t        j                  dg dd      }|d   j                  d      |d<   t        j                  |      }t        j                  |t        | dz        dg       t        j                  t        | dz              j                         }t        j                  |dg   |dg          t        j                  |t        | d	z               t        j                  t        | d	z              j                         }t        j                  |dg   |dg          t        j                  |t        | d
z               t        j                  t        | d
z              j                         }t        j                  |dg   |dg          y )Nr   r   partr  r  Int64case1r  partition_colscase2r   )r>   r?   r   r   rR   r    write_to_datasetr   r  rb   rc   rd   rE   )r%   r&   rR   r   s       r*   5test_write_to_dataset_pandas_preserve_extensiondtypesr  $  sF   	s95	6B5	  )BuIHHRLEs7W$%vh ]]3w012<<>F&%/2ug;7s7W#456]]3w012<<>F&%/2ug;7NN5#g678]]3w789CCEF&%/2ug;7r,   c                 j   t        j                  g dg dd      }t        j                  g dd      |_        t	        j
                  |      }|ddg   j                         }|d   j                  d	      |d<   t        j                  |t        | d
z        dg       t        j                  t        | d
z              j                         }t        j                  ||       t        j                  |t        | dz               t        j                  t        | dz              j                         }t        j                  ||       t        j                  |t        | dz               t        j                  t        | dz              j                         }t        j                  ||       y )N)r   r   r   r   r  r   idxr   r  r  r   r  r  r  r   )r>   r?   Indexr   r   rR   copyr   r    r  r   r  rb   rc   rd   rE   )r%   r&   rR   df_catr   s        r*   +test_write_to_dataset_pandas_preserve_indexr$  9  sM    
yA	BBxxe4BHHHRLE %%'FF^**:6F6Ns7W$%vh ]]3w012<<>F&&)s7W#456]]3w012<<>F&"%NN5#g678]]3w789CCEF&"%r,   r9   )TFNmetadata_fname	_metadata_common_metadatac                    d}d}| t               z  }|j                          g }g }g }t        |      D ]  }	t        ||	      }
t	        j
                  t        j                  |	|z  |	dz   |z  d      d      |
_        ||	 dz  }t        j                  j                  |
|	      }|j                  d       }|j                  j                  J t        ||       |j!                  |       |j!                  |
       |j!                  |        t        j                  j                  
|	      }t#        j$                  |j                  ||z         t#        j&                  |      }d
dg}|j)                  |      j+                         }t	        j,                  |D cg c]  }||   	 c}      }|dur|
j                  j.                  nd |j                  _        t1        j2                  ||       y c c}w )Nr8   )r   r   r   r2   r   r   z.parquetri   rB   ry   rz   F)r   mkdirr   r   r>   r!  r@   rA   r   r   r   r   replace_schema_metadatar   r   r	   appendr    write_metadataParquetDatasetra   rb   concatr   rc   rd   )r%   r9   r%  nfilesr   dirpath	test_dataframespathsir&   r   rR   table_for_metadatadatasetr_   r   r	  r   s                      r*   (test_dataset_read_pandas_common_metadatar7  R  s    FDGMMOIFE6] T*88IIa$hQ$g>W
 A3h'$$R$G --d3||$$,,,UD!bT#( --
> .  (//>1IJ(G	"G   1;;=Fyyf5!G*56H'u4$ NN&(+ 6s   !G6c                    t        j                  dg di      }| dz  }t        ||       t        j                  dt        t        |       t                           }|j                  t        j                  |            sJ y )Nr   r   r   )
filesystem)r>   r?   r	   r    ra   r   r   r   rC   r   rR   )r%   r&   r'   r   s       r*   %test_read_pandas_passthrough_keywordsr:    si     
sI&	'B'HX^^$S\?3DEF =="&&&r,   c                 z   t        j                  t        j                  ddgddgg      t        j                  ddg      d      }| dz  }t        j                  t        j
                         t        j
                               }t        j                  t        j                  d	|      t        j                  d
t        j
                               g      }t        j                  j                  ||      }t        ||       t        j                  |      j                         }t        j                  ||       y )N)id	something)value2else)r<  
something2)valueelse2r   r   )col1col2r   rC  rD  )r>   r?   Seriesr   map_r0   r   r;   r   r   r	   r    ra   rb   rc   rd   )r%   r&   r'   udtr   r(   r   s          r*   test_read_pandas_map_fieldsrH    s     
		 "45!#56
  		5%.) 
B 'H
''"))+ryy{
+CYY-rxx		/LMNF((&&r62Kh'^^H%//1F&"%r,   )<r   r"   numpyr@   ImportErrorr   pyarrowr   
pyarrow.fsr   r   pyarrow.utilr   pyarrow.vendored.versionr   pyarrow.parquetparquetr    pyarrow.tests.parquet.commonr   r   r	   pandasr>   pandas.testingtestingrc   r
   r   mark
pytestmarkr+   rK   rS   rg   rj   rw   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r$  parametrizer7  r:  rH   r,   r*   <module>rY     s'  $ 
    9  , < <? [[  
 0 0&  8 ( (0 ' '$ ' '0 ' ' 
= 
= ' ' $ $  ' '. )+ )+X & & * *: ) )* , ,( , ,0 , ,2 J J0 ) ) & & & &" & &$ ' '. 8 8( & &0 )+>?)K9K+LM,, N @ ,,^ ' ' & &}  	B  	B  NBs3   N7 O O 7OOOO	O O 