Well, there's a way to make any type comparable: Simply wrap it in a class that compares like you need it:
class DataFrameWrapper():
def __init__(self, df):
self.df = df
def __eq__(self, other):
return self.df.equals(other.df)
So when you wrap your "uncomparable" values you can now simply use ==
:
>>> import pandas as pd
>>> df1 = pd.DataFrame({'a': [1,2,3]})
>>> df2 = pd.DataFrame({'a': [3,2,1]})
>>> a = {'x': 1, 'y': {'z': "George", 'w': DataFrameWrapper(df1)}}
>>> b = {'x': 1, 'y': {'z': "George", 'w': DataFrameWrapper(df1)}}
>>> c = {'x': 1, 'y': {'z': "George", 'w': DataFrameWrapper(df2)}}
>>> a == b
True
>>> a == c
False
Of course wrapping your values has it's disadvantages but if you only need to compare them that would be a very easy approach. All that may be needed is a recursive wrapping before doing the comparison and a recursive unwrapping afterwards:
def recursivewrap(dict_):
for key, value in dict_.items():
wrapper = wrappers.get(type(value), lambda x: x) # for other types don't wrap
dict_[key] = wrapper(value)
return dict_ # return dict_ so this function can be used for recursion
def recursiveunwrap(dict_):
for key, value in dict_.items():
unwrapper = unwrappers.get(type(value), lambda x: x)
dict_[key] = unwrapper(value)
return dict_
wrappers = {pd.DataFrame: DataFrameWrapper,
dict: recursivewrap}
unwrappers = {DataFrameWrapper: lambda x: x.df,
dict: recursiveunwrap}
Sample case:
>>> recursivewrap(a)
{'x': 1,
'y': {'w': <__main__.DataFrameWrapper at 0x2affddcc048>, 'z': 'George'}}
>>> recursiveunwrap(recursivewrap(a))
{'x': 1, 'y': {'w': a
0 1
1 2
2 3, 'z': 'George'}}
If you feel really adventurous you could use wrapper classes that depending on the comparison result modify some variable that holds the information what wasn't equal.
This part of the answer was based on the original question that didn't include nestings:
You can seperate the unhashable values from the hashable values and do a set-comparison for the hashable values and a "order-independant" list-comparison for the unhashables:
def split_hashable_unhashable(vals):
"""Seperate hashable values from unhashable ones and returns a set (hashables)
and list (unhashable ones)"""
set_ = set()
list_ = []
for val in vals:
try:
set_.add(val)
except TypeError: # unhashable
list_.append(val)
return set_, list_
def compare_lists_arbitary_order(l1, l2, cmp=pd.DataFrame.equals):
"""Compare two lists using a custom comparison function, the order of the
elements is ignored."""
# need to have equal lengths otherwise they can't be equal
if len(l1) != len(l2):
return False
remaining_indices = set(range(len(l2)))
for item in l1:
for cmpidx in remaining_indices:
if cmp(item, l2[cmpidx]):
remaining_indices.remove(cmpidx)
break
else:
# Run through the loop without finding a match
return False
return True
def dict_compare(d1, d2):
if set(d1) != set(d2): # compare the dictionary keys
return False
set1, list1 = split_hashable_unhashable(d1.values())
set2, list2 = split_hashable_unhashable(d2.values())
if set1 != set2: # set comparison is easy
return False
return compare_lists_arbitary_order(list1, list2)
It got a bit longer than expected. For your test-cases it definetly works:
>>> import pandas as pd
>>> df1 = pd.DataFrame({'a': [1,2,3]})
>>> df2 = pd.DataFrame({'a': [3,2,1]})
>>> a = {'x': 1, 'y': df1}
>>> b = {'y': 1, 'x': df1}
>>> c = {'y': 1, 'x': df2}
>>> dict_compare(a, b)
True
>>> dict_compare(a, c)
False
>>> dict_compare(b, c)
False
The set
-operations can also be used to find differences (see set.difference
). It's a bit more complicated with the list
s, but not really impossible. One could add the items where no match was found to a seperate list instead of instantly returning False
.