It seems the only way is to clean the data so I imported files to python and cleaned ''. Then load the cleaned dataframe to postgreSQL.
param_dic = {
"host" : "localhost",
"database" : "database",
"user" : "user",
"password" : "password"
}
def connect(params_dic):
""" Connect to the PostgreSQL database server """
conn = None
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params_dic)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
print("Connection successful")
return conn
def execute_many(conn, df, table):
"""
Using cursor.executemany() to insert the dataframe
"""
# Create a list of tupples from the dataframe values
tuples = [tuple(x) for x in df.to_numpy()]
# Comma-separated dataframe columns
cols = ','.join(list(df.columns))
# SQL quert to execute
query = "INSERT INTO %s(%s) VALUES(%%s,%%s,%%s)" % (table, cols)
cursor = conn.cursor()
try:
cursor.executemany(query, tuples)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
print("execute_many() done")
cursor.close()
More methods can be found here https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…