docker run —help | grep -i gpus #查看是否具备—gpus参数
11.4.2-base-centos7
测试命令:
nvidia-smi
#docker-compose.yaml代码
services:
test:
image: nvidia/cuda:10.2-base
command: nvidia-smi
deploy:
resources:
reservations:
devices:
- driver: nvidia #驱动
count: 1 #或"all" #数量
device_ids: ['0', '3'] #gpu id和 count二选一
capabilities: [gpu] #指定需要支持的功能;可以配置多个不同功能
capabilities: [gpu, utility] #支持多种功能
这里的capabilities是必须要指定的,可以是 [“gpu”] [“tpu”] [“nvidia-compute”]
而且count、driver、capabilities这是一组,不能每个加”-“,不然会报错
#安装docker驱动
https://docs.docker.com/config/containers/resource_constraints/#gpu
#安装运行时
apt-get install nvidia-container-runtime
#查找环境变量
which nvidia-container-runtime-hook
配置daemon.json
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
}
}
sudo systemctl daemon-reload
sudo systemctl restart docker
docker-compose gpu配置
https://docs.docker.com/compose/gpu-support/
version: '3.9'
services:
demo:
image: tensorflow/tensorflow:2.2.3-gpu-py3
runtime: nvidia
#command: nvidia-smi
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
经核查发现是tensorflow-gpu版本和cuda版本没对上
tensorflow-gpu版本1.15.0
原cuda:10.1
更改为10.0后问题解决。