安装CUDA驱动指南

最近更新时间:2020-09-03 10:09:56

CUDA (Compute Unified Device Architecture) 是显卡厂商 NVIDIA 推出的运算平台。 CUDA™ 是一种由 NVIDIA 推出的通用并行计算架构,该架构使 GPU 能够解决复杂的计算问题。 它包含了 CUDA 指令集架构(ISA)以及 GPU 内部的并行计算引擎。 开发人员现在可以使用 C 语言, C++ , FORTRAN 来为 CUDA™ 架构编写程序,所编写出的程序可以在支持 CUDA™ 的处理器上以超高性能运行。 GPU 云服务器采用 NVIDIA 显卡,需要安装 CUDA 开发运行环境。

以Ubuntu 18.0.4为例,可参照以下步骤进行安装。

安装依赖包

  • 登录 GPU 实例,打开管理员权限:

    sudo -i
  • 新建.sh文件并进入文件:

    vi auto_install_kingsoft.sh
  • 点击 i 进行编辑,并复制以下代码:
    
    #!bin/sh

usage() { cat << EOF

Auto install nvidia_driver and cuda.

Examples: $0 EOF }

检查os环境, 并获取配置文件的路径

os_check() { if [ -f "/etc/os-release" ];then os=$(cat /etc/os-release |grep "^ID="|awk -F '=' '{print $2}'|sed 's/\"//g') if [ "$os" = "centos" ];then echo "centos OS" elif [ "$os" = "ubuntu" ];then echo "ubuntu OS" else echo "error: unsupported os" exit 1 fi else echo "can not recognised os." fi

}

check_rpm() { rpm_packages_name=$1 echo "check $rpm_packages_name" if [ rpm -qa | grep $rpm_packages_name | wc -l -ne 1 ]; then echo "$rpm_packages_name not installed" echo "try to install $rpm_packages_name ..." yum install -y $rpm_packages_name if [ $? -ne 0 ]; then echo "failed to install $rpm_packages_name" while true;do stty -icanon min 0 time 100 echo -n "Do you want to upgrade kernel?" read Arg case $Arg in Y|y|YES|yes) yum install -y kernel kernel-headers kernel-devel echo "reboot & retry auto istall" exit 0; break;; N|n|NO|no) echo "please install $rpm_packages_name" exit 1 ;; "") #Autocontinue exit 1;; esac done else echo "install $rpm_packages_name finished" fi fi }

centos_set_nvidia_repo() {

cat >> /etc/yum.repos.d/cuda.repo << EOF

[cuda] name=cuda baseurl=http://yum.ksyun.cn/cuda/rhel\$releasever/\$basearch/ gpgcheck=1 gpgkey=http://yum.ksyun.cn/cuda/rhel\$releasever/\$basearch/7fa2af80.pub

EOF

yum clean all }

centos_remove_nvidia_repo() { if [ -e /etc/yum.repo.d/cuda.repo ]; then rm -y /etc/yum.repo.d/cuda.repo yum clean all fi }

centos_disable_nouveau() { echo "disable nouveau!" if [ ! -f /etc/modprobe.d/blacklist-nouveau.conf ];then echo "blacklist nouveau" > /etc/modprobe.d/blacklist-nouveau.conf echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf fi content=$(lsmod |grep nouveau)

if [ -n "$content" ];then
    rmmod nouveau
    echo "***exec \"dracut --force\" to regenerate the kernel initramfs"
    dracut --force
fi

}

check_rpm_dkms() { echo "check dkms" if [ rpm -qa | grep "^dkms" | wc -l -eq 0 ]; then echo "leaking dkms" yum -y install epel-release yum -y install dkms if [ $? -ne 0 ]; then echo "failed to install dkms" exit 1 fi fi }

centos_pre_install_check() {

check gcc

if [ ! -f "/usr/bin/gcc" ]; then
    yum install -y gcc
fi

#nouveau
centos_disable_nouveau

#check kernel rpms
kernel_version=`uname -r`
check_rpm kernel-headers-${kernel_version}
check_rpm kernel-devel-${kernel_version}

check_rpm_dkms

# set nvidia repo
centos_remove_nvidia_repo
nvidia_repo_name=` yum repolist | grep cuda | awk '{printf $4}' `
if [ "x$cuda_repo_name" != "x" ]; then
    yum --disablerepo=[nvidia_repo_name]
    yum clean all
fi
centos_set_nvidia_repo

}

ubuntu_install_kernel() { kernel_version=$(uname -r) linux_headers_num=$(dpkg --list |grep linux-headers | grep $kernel_version | wc -l) if [ $linux_headers_num -eq 0 ];then echo "***exec \"apt-get install -y --allow-unauthenticated linux-headers-$kernel_version\"" apt-get install -y --allow-unauthenticated linux-headers-$kernel_version if [ $? -ne 0 ]; then echo "INSTALL_ERROR: install linux-headers fail!!!" return 1 fi fi }

ubuntu_disable_nouveau() { if [ ! -f /etc/modprobe.d/blacklist-nouveau.conf ];then echo "blacklist nouveau" > /etc/modprobe.d/blacklist-nouveau.conf echo "blacklist lbm-nouveau" >> /etc/modprobe.d/blacklist-nouveau.conf echo "options nouveau modeset=0" >> /etc/modprobe.d/blacklist-nouveau.conf fi content=$(lsmod |grep nouveau) if [ -n "$content" ];then rmmod nouveau echo "***exec \"update-initramfs -u\" to regenerate the kernel initramfs" update-initramfs -u fi }

ubuntu_set_nvidia_repo() { release=sudo lsb_release -a | grep Release | awk '{print $2}' | sed "s/\.//g" apt-key adv --fetch-keys http://apt.ksyun.cn/cuda/ubuntu${release}/x86_64/7fa2af80.pub sudo add-apt-repository "deb http://apt.ksyun.cn/cuda/ubuntu${release}/x86_64/ /" apt-get clean apt-get update

}

ubuntu_remove_nvidia_repo() { echo "to do"

rm /etc/apt/sources.list -y

cp /etc/apt/sources.list.cudabak /etc/apt/sources.list

}

ubuntu_pre_install_check() {

check gcc

if [ ! -f "/usr/bin/gcc" ]; then
    apt-get install -y gcc
fi

#nouveau
ubuntu_disable_nouveau

#kernel source 
ubuntu_install_kernel

# set nvidia repo
ubuntu_remove_nvidia_repo
ubuntu_set_nvidia_repo

}

ubuntu_install() {

todo

sudo apt-get -y install cuda }

centos_install(){ sudo yum -y install nvidia-driver-latest-dkms cuda sudo yum -y install cuda-drivers }

centos_check_install() { nvidia-smi -q if [ $? -ne 0 ]; then echo "exec nvidia-smi failed !" echo "try to reboot" exit 1 fi }

ubuntu_check_install() { nvidia-smi -q if [ $? -ne 0 ]; then retpoline_errors=dmesg | grep nvidia | grep retpoline | wc -l if [ ${retpoline_errors} -ne 0 ]; then echo "retpoline_errors! " echo "Please downgrade kernel (< 4.4.0-116-generic) or rebuild driver with gcc support RETPOLINE!" fi exit 1 fi }

nvidia_enable_pm() { nvidia-smi -pm 1 }

set_env() { env_file="/root/.bashrc"

env_path="/usr/local/bin:/usr/local/cuda/bin:"
env_library="/usr/local/lib:/usr/local/cuda/lib64:"
env1="export PATH=${env_path}\$PATH"
env2="export LD_LIBRARY_PATH=${env_library}\$LD_LIBRARY_PATH"

echo $env1 >> ${env_file}
echo $env2 >> ${env_file}

}

if [ $# -ne 0 ]; then usage exit 1 fi

os_check

if [ "$os" = "centos" ];then centos_pre_install_check centos_install centos_check_install set_env nvidia_enable_pm elif [ "$os" = "ubuntu" ]; then ubuntu_pre_install_check ubuntu_install ubuntu_check_install set_env nvidia_enable_pm else echo "OS not supported!" exit 1 fi

exit 0

点击 esc 输入 :wq

- 运行.sh文件:

sh auto_install_kingsoft.sh


### 下载驱动
1. 登录GPU实例,进行[CUDA驱动下载](https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&target_distro=Ubuntu&target_version=1604&target_type=runfilelocal)或复制链接 https://developer.nvidia.com/cuda-downloads

2. 选择与自己的操作系统相匹配的安装包。以Ubuntu 16.0.4 64 位为例,可按如下方式进行选择:  

    ![cuda.png](http://fe-frame.ks3-cn-beijing.ksyun.com/project/cms/72d6a0ae95c232d34eeadc8a6e4fdd68)

    **注意:** 
    - Installer Type 这里推荐选择 runfile(local)。
    - network:网络安装包,安装包较小,需要在主机内联网下载实际的安装包。
    - local:本地安装包。安装包较大,包含每一个下载安装组件的安装包。

3. 点击【Download】,选择下载存放地址:

    ![cuda1.png](http://fe-frame.ks3-cn-beijing.ksyun.com/project/cms/dc4855c12239f51668d189ce31587487)

4. 切换到CUDA安装包所在的目录,执行以下命令:
sudo sh cuda_9.1.85_387.26_linux.run  
```
根据提示选择accept -yes -enter。

**注意:**
- 若执行后出现如下结果:

    ```
    Driver: Installed require reboot    
    Toolkit: install skip  
    Samples: install skip  
    ```
    说明这个CUDA安装包包含了Driver,Toolkit和Samples三部分,但此次安装时只把驱动装上了。

    此时需重新安装,再次执行以下命令:
    ```
    sudo sh cuda_9.1.85_387.26_linux.run
    ```

- CUDA安装成功结果如下:  

    ```
    Driver: Installed   
    Toolkit: Installed in /usr/local/cuda   
    Samples: Installed in /home/XX   
    ```
  1. 在 /usr/local/cuda/samples/1_Utilities/deviceQuery 目录下,执行 make 命令,可以编译出 deviceQuery 程序。执行 deviceQuery 正常显示如下设备信息,此刻认为 CUDA 安装正确。

    安装CUDA驱动指南

金山云,开启您的云计算之旅

免费注册