I am try to upload these local cron files(older than 7 days) to s3
'/home/anti/Documents/s3_upload/folder1/cron/20210101/12/cron.log',
'/home/anti/Documents/s3_upload/folder1/cron/20210101/00/cron.log',
'/home/anti/Documents/s3_upload/folder2/folder2_2/cron/20210101/10/cron.log',
'/home/anti/Documents/s3_upload/folder2/folder2_1/cron/20210101/11/cron.log'
I am trying to use regex to split 20210101
from file path and compare if its older than 7 days
This is my code
#!/usr/bin/python3
import configparser
import os
from os import system, name
import time
from pandas.io.json import json_normalize
import pytz as tz
from datetime import datetime, timedelta
import boto3
#import slack
import sys
import subprocess
import os
import sys
import shutil
from pathlib import PurePath
import re
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
base_path = os.path.dirname(os.path.realpath(__file__))
parser = configparser.ConfigParser()
parser.read(os.path.join(base_path, 'FileConfig.cfg'))
bucket = parser.get('file_config', 's3bucket')
#channel = parser.get('file_config', 'channel')
tz = tz.timezone('Asia/Singapore')
curr_data_end_time = datetime.now(tz)
epoch_timestamp = datetime.now(tz).timestamp()
aws_access_key_id = parser.get('file_config', 'aws_key_id')
aws_secret_access_key = parser.get('file_config', 'aws_secret')
s3 = boto3.resource('s3', region_name='ap-southeast-1',
aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
#token = parser.get('file_config', 'token')
#client = slack.WebClient(token=token)
fullpath = []
for root, dir, filelist in os.walk(root_path):
if 'cron' in root:
for file in filelist:
if 'cron' in file:
fullpath.append(os.path.join(root, file))
fmt = '%Y-%m-%d %H:%M:%S'
print(fullpath)
upload_list = []
for file in fullpath:
path = re.split(pattern=r'////', string=file)
for dt in path:
print("path :",path)
date_ = re.match(pattern=r'/d{8}', string=dt) #trying to match 20210101 from filelist
print("date : ", date_)
if len(str(date_)) > 4:
print("here") #does not print
date_ts = datetime.strptime(dt, '%Y%m%d')
curr_ts = datetime.strptime(
(curr_data_end_time.strftime("%Y-%m-%d %H:00:00")), fmt)
td = curr_ts - date_ts
td_days = int(round(((td.total_seconds() / 60) / 60) / 24))
if td_days >= 7:
# upload_list.append(file)
#s3.Bucket(bucket).upload_file(file, file)
with open(fullpath, 'rb') as data:
bucket.put_object(Key=fullpath[len(path)+1:], Body=data)
# delete
The regex seems not to be working.
Also is there a better method to upload the folders to s3 than bucket.put_object(Key=fullpath[len(path)+1:], Body=data)
this is the output i'm getting.
path : ['/home/anti/Documents/s3_upload/folder1/cron/20210101/12/cron.log']
date : None
path : ['/home/anti/Documents/s3_upload/folder1/cron/20210101/00/cron.log']
date : None
path : ['/home/anti/Documents/s3_upload/folder2/folder2_2/cron/20210101/10/cron.log']
date : None
path : ['/home/anti/Documents/s3_upload/folder2/folder2_1/cron/20210101/11/cron.log']
date : None
question from:
https://stackoverflow.com/questions/66058674/is-there-a-better-way-to-upload-multiple-local-directories-to-s3 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…