physicalattraction's answer is indeed significantly quicker. It's much faster than my solution, which was to just add a separate matrix with that single row set. Though the addition solution was faster than the slicing solution.
The take away for me is that the fastest way to set rows in a csr_matrix or columns in a csc_matrix is to modify the underlying data yourself.
def time_copy(A, num_tries = 10000):
start = time.time()
for i in range(num_tries):
B = A.copy()
end = time.time()
return end - start
def test_method(func, A, row_idx, new_row, num_tries = 10000):
start = time.time()
for i in range(num_tries):
func(A.copy(), row_idx, new_row)
end = time.time()
copy_time = time_copy(A, num_tries)
print("Duration {}".format((end - start) - copy_time))
def set_row_csr_slice(A, row_idx, new_row):
A[row_idx,:] = new_row
def set_row_csr_addition(A, row_idx, new_row):
indptr = np.zeros(A.shape[1] + 1)
indptr[row_idx +1:] = A.shape[1]
indices = np.arange(A.shape[1])
A += csr_matrix((new_row, indices, indptr), shape=A.shape)
>>> A = csr_matrix((np.ones(1000), (np.random.randint(0,1000,1000), np.random.randint(0, 1000, 1000))))
>>> test_method(set_row_csr_slice, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 4.938395977020264
>>> test_method(set_row_csr_addition, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 2.4161765575408936
>>> test_method(set_row_csr, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 0.8432261943817139
The slice solution also scales much worse with the size and sparsity of the matrix.
# Larger matrix, same fraction sparsity
>>> A = csr_matrix((np.ones(10000), (np.random.randint(0,10000,10000), np.random.randint(0, 10000, 10000))))
>>> test_method(set_row_csr_slice, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 18.335174798965454
>>> test_method(set_row_csr, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 1.1089558601379395
# Super sparse matrix
>>> A = csr_matrix((np.ones(100), (np.random.randint(0,10000,100), np.random.randint(0, 10000, 100))))
>>> test_method(set_row_csr_slice, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 13.371600151062012
>>> test_method(set_row_csr, A, 200, np.ones(A.shape[1]), num_tries = 10000)
Duration 1.0454308986663818