the documentation says that the optional keyword arguments are passed to TextFileReader.
Well technically when you call pandas.io.parsers.read_csv, pandas.io.parsers.read_fwf or pandas.io.parsers.read_table the keyword args and all your other args are passed to pandas.io.parsers._read which in turn passes them to pandas.io.parsers.TextFileReader.
As we can see below, the __init__
of pandas.io.parsers.TextFileReader assigns some specific kwds to various instance variables and keeps anything not expected by the init method in a instance variable called self.orig_options
.
class TextFileReader(abc.Iterator):
"""
Passed dialect overrides any of the related parser options
"""
def __init__(self, f, engine=None, **kwds):
self.f = f
if engine is not None:
engine_specified = True
else:
engine = "python"
engine_specified = False
self.engine = engine
self._engine_specified = kwds.get("engine_specified", engine_specified)
_validate_skipfooter(kwds)
dialect = _extract_dialect(kwds)
if dialect is not None:
kwds = _merge_with_dialect_properties(dialect, kwds)
if kwds.get("header", "infer") == "infer":
kwds["header"] = 0 if kwds.get("names") is None else None
self.orig_options = kwds
# miscellanea
self._currow = 0
options = self._get_options_with_defaults(engine)
options["storage_options"] = kwds.get("storage_options", None)
self.chunksize = options.pop("chunksize", None)
self.nrows = options.pop("nrows", None)
self.squeeze = options.pop("squeeze", False)
self._check_file_or_buffer(f, engine)
self.options, self.engine = self._clean_options(options, engine)
if "has_index_names" in kwds:
self.options["has_index_names"] = kwds["has_index_names"]
self._engine = self._make_engine(self.engine)
As far as I can find, self.orig_options
is only ever used when the _get_options_with_defaults method is called. This method appears to do some more validation of the options to make sure they will work with whatever engine you are telling the reader to use.
def _get_options_with_defaults(self, engine):
kwds = self.orig_options
options = {}
for argname, default in parser_defaults.items():
value = kwds.get(argname, default)
# see gh-12935
if argname == "mangle_dupe_cols" and not value:
raise ValueError("Setting mangle_dupe_cols=False is not supported yet")
else:
options[argname] = value
for argname, default in _c_parser_defaults.items():
if argname in kwds:
value = kwds[argname]
if engine != "c" and value != default:
if "python" in engine and argname not in _python_unsupported:
pass
elif value == _deprecated_defaults.get(argname, default):
pass
else:
raise ValueError(
f"The {repr(argname)} option is not supported with the "
f"{repr(engine)} engine"
)
else:
value = _deprecated_defaults.get(argname, default)
options[argname] = value
if engine == "python-fwf":
# pandasioparsers.py:907: error: Incompatible types in assignment
# (expression has type "object", variable has type "Union[int, str,
# None]") [assignment]
for argname, default in _fwf_defaults.items(): # type: ignore[assignment]
options[argname] = kwds.get(argname, default)
return options
If the kwds get passed all this validation, they end up in self.options
which is used by the _make_engine method as the arguments to be passed to the parser engine.
def _make_engine(self, engine="c"):
mapping: Dict[str, Type[ParserBase]] = {
"c": CParserWrapper,
"python": PythonParser,
"python-fwf": FixedWidthFieldParser,
}
if engine not in mapping:
raise ValueError(
f"Unknown engine: {engine} (valid options are {mapping.keys()})"
)
# error: Too many arguments for "ParserBase"
return mapping[engine](self.f, **self.options)
Now as for the question:
What can be passed to TextFileReader?
the answer is highly depended on what engine you are using and what arguments it supports.