1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| import datetime import awswrangler as wr
def datetime_beijing(datetime_): beijing_time = datetime_ + datetime.timedelta(hours=8) return beijing_time
def gen_part_parquet(df: str,path: str,part: list,table: str,dtype: dict): if len(df)>0: print(datetime_beijing(datetime.datetime.now()), '###### generate {type} parquet start...'.format(type=table)) wr.s3.to_parquet( df = df, path= path, dataset=True, mode="overwrite_partitions", partition_cols=part, sanitize_columns=True, database="xxx", table=table, dtype=dtype ) print(datetime_beijing(datetime.datetime.now()), '###### generate {type} parquet end...'.format(type=table)) else: print(df," is empty....")
def gen_s3_parquet(df: str,path: str,table: str, dtype=None): if len(df)>0: print(datetime_beijing(datetime.datetime.now()), '###### generate {type} start...'.format(type=table)) wr.s3.to_parquet( df = df, path= path, dataset=True, mode="overwrite", sanitize_columns=True, database="xxx", table=table, dtype=dtype ) print(datetime_beijing(datetime.datetime.now()), '###### generate {type} end...'.format(type=table)) else: print(df," is empty....")
|