Replies by Arun Maskara for InterSystems Developer Community

Thanks for this post, I noticed that doing insert from SQLAlchemy is slower than when is done directly from Python. So I wrote following code, in one case I can getting connection string from SQLAlchemy and in second case getting it from Python. Exact code is 50% slower when connection is obtained from SQLAlchemy. Any thoughts?

The code is below, only difference between the two run is how I am setting the connection variable.

import pandas as pd

from sqlalchemy import create_engine

import numpy as np

import time

import iris

# Create the SQLAlchemy engine

DATABASE_URL = "iris+intersystems://XXX:XXX@localhost:1972/USER"

engine = create_engine(DATABASE_URL, echo=True)

args = {'hostname':'localhost', 'port':1972, 'namespace':'USER', 'username':'XXX', 'password':'XXX'}

# connection = iris.connect(**args)

connection = engine.raw_connection()

# Generate data for each row (49 fields)

num_records = 100000

# Define SQL insert statement

sql_insert = "INSERT INTO SQLUser . test_table ( field_0 , field_1 , field_2 , field_3 , field_4 , field_5 , field_6 , field_7 , field_8 , field_9 , field_10 , field_11 , field_12 , field_13 , field_14 , field_15 , field_16 , field_17 , field_18 , field_19 , field_20 , field_21 , field_22 , field_23 , field_24 , field_25 , field_26 , field_27 , field_28 , field_29 , field_30 , field_31 , field_32 , field_33 , field_34 , field_35 , field_36 , field_37 , field_38 , field_39 , field_40 , field_41 , field_42 , field_43 , field_44 , field_45 , field_46 , field_47 , field_48 ) VALUES ( ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? , ? ) "

record_values = []

# Execute SQL insert

try:

start_time = time.perf_counter() # Capture start time

batch = 0

cursor = connection.cursor()

for _ in range(num_records):

record_values = [np.random.rand() for _ in range(49)]

cursor.execute(sql_insert, record_values)

batch = batch + 1

if batch >= 10000:

connection.commit()

batch = 0

# print("Record inserted successfully!")

connection.commit()

end_time = time.perf_counter() # Capture end time

elapsed_time = end_time - start_time

print(f"Time taken: {elapsed_time} seconds to insert {num_records} at ", num_records/elapsed_time, " records per second.")

except Exception as e:

print("Error inserting record:", e)

finally:

cursor.close()

connection.close()

engine.dispose()