So I tried to apply LabelEncoder() function to columns that have object
dtype on my Dask dataframe:
le = dm.LabelEncoder() #dm is dask-ml module
for column in df.columns:
if df[column].dtype == type(object):
df[column] =le.fit_transform(df[column].astype(str))
but it always threw the error
AttributeError: 'bool' object has no attribute 'astype'
Here's the full the error:
AttributeError Traceback (most recent call last)
<ipython-input-3-f9d73f91f2c1> in <module>
2 for column in df.columns:
3 if df[column].dtype == type(object):
----> 4 df[column] =le.fit_transform(df[column].astype(str))
5
6
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdask_mlpreprocessinglabel.py in fit_transform(self, y)
131 self, y: Union[ArrayLike, SeriesType]
132 ) -> Union[ArrayLike, SeriesType]:
--> 133 y = self._check_array(y)
134
135 if isinstance(y, da.Array):
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdask_mlpreprocessinglabel.py in _check_array(self, y)
109 y = y.cat.as_known()
110 else:
--> 111 y = y.to_dask_array(lengths=True)
112 return y
113
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskdataframecore.py in to_dask_array(self, lengths, meta)
1429 """
1430 if lengths is True:
-> 1431 lengths = tuple(self.map_partitions(len, enforce_metadata=False).compute())
1432
1433 arr = self.values
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskase.py in compute(self, **kwargs)
277 dask.base.compute
278 """
--> 279 (result,) = compute(self, traverse=False, **kwargs)
280 return result
281
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskase.py in compute(*args, **kwargs)
559 postcomputes.append(x.__dask_postcompute__())
560
--> 561 results = schedule(dsk, keys, **kwargs)
562 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
563
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskhreaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdasklocal.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
485 _execute_task(task, data) # Re-execute locally
486 else:
--> 487 raise_exception(exc, tb)
488 res, worker_id = loads(res_info)
489 state["cache"][key] = res
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdasklocal.py in reraise(exc, tb)
315 if exc.__traceback__ is not tb:
316 raise exc.with_traceback(tb)
--> 317 raise exc
318
319
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdasklocal.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in <genexpr>(.0)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in <genexpr>(.0)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in <genexpr>(.0)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskoptimization.py in __call__(self, *args)
961 if not len(args) == len(self.inkeys):
962 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 963 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
964
965 def __reduce__(self):
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in get(dsk, out, cache)
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskcore.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskutils.py in apply(func, args, kwargs)
29 def apply(func, args, kwargs=None):
30 if kwargs:
---> 31 return func(*args, **kwargs)
32 else:
33 return func(*args)
c:usersgp62pycharmprojectsskripsivenvlibsite-packagesdaskarraychunk.py in astype(x, astype_dtype, **kwargs)
266
267 def astype(x, astype_dtype=None, **kwargs):
--> 268 return x.astype(astype_dtype, **kwargs)
269
270
AttributeError: 'bool' object has no attribute 'astype'
Any help is appreciated :)