I was able to overcome the issue by creating a bucketing column month_a
. Below is the code
CREATE TABLE sampledb.yellow_trip_data_avro
WITH (
format = 'AVRO',
external_location='s3://a4189e1npss3001/Athena/internal_tables/avro/',
partitioned_by=ARRAY['year','month'],
bucketed_by=ARRAY['month_a'],
bucket_count=12
) AS SELECT
VendorID,
tpep_pickup_datetime,
tpep_dropoff_datetime,
passenger_count,
trip_distance,
RatecodeID,
store_and_fwd_flag,
PULocationID,
DOLocationID,
payment_type,
fare_amount,
extra,
mta_tax,
tip_amount,
tolls_amount,
improvement_surcharge,
total_amount,
date_format(date_parse(tpep_pickup_datetime, '%Y-%c-%d %k:%i:%s'),'%c') AS month_a,
date_format(date_parse(tpep_pickup_datetime, '%Y-%c-%d %k:%i:%s'),'%Y') AS year,
date_format(date_parse(tpep_pickup_datetime, '%Y-%c-%d %k:%i:%s'),'%c') AS month
FROM sampleDB.yellow_trip_data_raw;
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…