I've been trying to install PyTorch 1.9 with Cuda (ideally 11) on my HPC but I cannot.
The cluster says:
Package typing-extensions conflicts for:
typing-extensions
torchvision -> pytorch==1.8.1 -> typing-extensionsThe following specifications were found to be incompatible with your system:
- feature:/linux-64::__glibc==2.17=0
- feature:|@/linux-64::__glibc==2.17=0
- cffi -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- cudatoolkit=11.0 -> __glibc[version='>=2.17,<3.0.a0']
- cudatoolkit=11.0 -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- freetype -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- jpeg -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- lcms2 -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- libffi -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- libgcc-ng -> __glibc[version='>=2.17']
- libmklml -> libgcc-ng -> __glibc[version='>=2.17']
- libpng -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- libstdcxx-ng -> __glibc[version='>=2.17']
- libtiff -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- libwebp-base -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- lz4-c -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- mkl-service -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- mkl_fft -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- mkl_random -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- ncurses -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- ninja -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- numpy -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- numpy-base -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- openjpeg -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- openssl -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- pillow -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- python=3.9 -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- pytorch==1.9 -> cudatoolkit[version='>=11.1,<11.2'] -> __glibc[version='>=2.17,<3.0.a0']
- readline -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- sqlite -> libgcc-ng[version='>=7.5.0'] -> __glibc[version='>=2.17']
- tk -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- torchvision -> cudatoolkit[version='>=11.1,<11.2'] -> __glibc[version='>=2.17|>=2.17,<3.0.a0']
- xz -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- zlib -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
- zstd -> libgcc-ng[version='>=7.3.0'] -> __glibc[version='>=2.17']
Your installed version is: 2.17
but I don't understand how to use that info to install it. Is it something I can do for the system admins?
When I try to install it with conda, I get a message telling me that it's already installed. However, a conda list greps shows the version is only CPU, not GPU:
(metalearning_gpu) miranda9~/automl-meta-learning $ conda install pytorch torchvision torchaudio cudatoolkit=11.1 -c pytorch -c nvidia
Collecting package metadata (current_repodata.json): done
Solving environment: done
# All requested packages already installed.
(metalearning_gpu) miranda9~/automl-meta-learning $
(metalearning_gpu) miranda9~/automl-meta-learning $ conda list | grep torch
cpuonly 1.0 0 pytorch
ffmpeg 4.3 hf484d3e_0 pytorch
pytorch 1.9.0 py3.9_cpu_0 [cpuonly] pytorch
torch 1.9.0+cpu pypi_0 pypi
torchaudio 0.9.0 pypi_0 pypi
torchmeta 1.7.0 pypi_0 pypi
torchvision 0.10.0+cpu pypi_0 pypi
Attempting to install it with pip completely fails:
(metalearning_gpu) miranda9~/automl-meta-learning $ pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cu111
ERROR: Exception:
Traceback (most recent call last):
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/cli/base_command.py", line 173, in _main
status = self.run(options, args)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/cli/req_command.py", line 203, in wrapper
return func(self, options, args)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/commands/install.py", line 315, in run
requirement_set = resolver.resolve(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/resolver.py", line 94, in resolve
result = self._result = resolver.resolve(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/resolvelib/resolvers.py", line 472, in resolve
state = resolution.resolve(requirements, max_rounds=max_rounds)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/resolvelib/resolvers.py", line 341, in resolve
self._add_to_criteria(self.state.criteria, r, parent=None)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/resolvelib/resolvers.py", line 172, in _add_to_criteria
if not criterion.candidates:
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/resolvelib/structs.py", line 151, in __bool__
return bool(self._sequence)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/found_candidates.py", line 140, in __bool__
return any(self)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/found_candidates.py", line 128, in <genexpr>
return (c for c in iterator if id(c) not in self._incompatible_ids)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/found_candidates.py", line 32, in _iter_built
candidate = func()
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/factory.py", line 204, in _make_candidate_from_link
self._link_candidate_cache[link] = LinkCandidate(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/candidates.py", line 295, in __init__
super().__init__(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/candidates.py", line 156, in __init__
self.dist = self._prepare()
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/candidates.py", line 227, in _prepare
dist = self._prepare_distribution()
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/resolution/resolvelib/candidates.py", line 305, in _prepare_distribution
return self._factory.preparer.prepare_linked_requirement(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/operations/prepare.py", line 508, in prepare_linked_requirement
return self._prepare_linked_requirement(req, parallel_builds)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/operations/prepare.py", line 550, in _prepare_linked_requirement
local_file = unpack_url(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/operations/prepare.py", line 239, in unpack_url
file = get_http_url(
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/operations/prepare.py", line 102, in get_http_url
from_path, content_type = download(link, temp_dir.path)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/network/download.py", line 132, in __call__
resp = _http_get_download(self._session, link)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/network/download.py", line 115, in _http_get_download
resp = session.get(target_url, headers=HEADERS, stream=True)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/requests/sessions.py", line 555, in get
return self.request('GET', url, **kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/network/session.py", line 454, in request
return super().request(method, url, *args, **kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/requests/sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/requests/sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/cachecontrol/adapter.py", line 44, in send
cached_response = self.controller.cached_request(request)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_vendor/cachecontrol/controller.py", line 139, in cached_request
cache_data = self.cache.get(cache_url)
File "/home/miranda9/miniconda3/envs/metalearning_gpu/lib/python3.9/site-packages/pip/_internal/network/cache.py", line 54, in get
return f.read()
MemoryError
Current install script:
## Installation script
# to install do: bash ~/automl-meta-learning/install.sh
#conda update conda
#conda create -y -n metalearning_gpu python=3.9
#conda activate metalearning_gpu
#conda remove --name metalearning_gpu --all
module load cuda-toolkit/11.1
module load gcc/9.2.0
# A40, needs cuda at least 11.0, but 1.9 requires 11
conda activate metalearning_gpu
conda install pytorch torchvision torchaudio cudatoolkit=11.1 -c pytorch -c nvidia
pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
#conda activate metalearning_cpu
#conda install pytorch torchvision torchaudio cpuonly -c pytorch
#pip3 install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
# uutils installs
conda install -y dill
conda install -y networkx>=2.5
conda install -y scipy
conda install -y scikit-learn
conda install -y lark-parser -c conda-forge
# due to compatibility with torch=1.7.