Enable certain memory optmization options on cases where trying to load a large model on a low end card

pull/18/head
Guillermo Rodriguez 2023-08-14 03:15:45 +00:00
parent ffcf9dc905
commit 8a5e32d452
10 changed files with 196 additions and 73 deletions

36
Dockerfile-cuda 100644
View File

@ -0,0 +1,36 @@
from pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
env DEBIAN_FRONTEND=noninteractive
run apt-get update -qq && apt-get install -qqy \
apt-transport-https \
ca-certificates \
curl \
git \
lxc \
vim \
ffmpeg \
libsm6 \
libxext6 \
iptables
run curl -sSL https://get.docker.com/ | sh
add ./wrapdocker /usr/local/bin/wrapdocker
run chmod +x /usr/local/bin/wrapdocker
volume /var/lib/docker
env HF_HOME hf_home
workdir /root/target
add ./requirements.cuda.txt requirements.cuda.txt
add ./requirements.txt requirements.txt
add ./setup.py setup.py
add ./skynet skynet
run pip install -r requirements.cuda.txt
run pip install -r requirements.txt
run pip install -e .
cmd ["wrapdocker"]

View File

@ -1,16 +0,0 @@
from python:3.10.0
env DEBIAN_FRONTEND=noninteractive
workdir /skynet
copy requirements.txt requirements.txt
copy pytest.ini ./
copy setup.py ./
copy skynet ./skynet
run pip install \
-e . \
-r requirements.txt
copy tests ./

View File

@ -1,29 +0,0 @@
from nvidia/cuda:11.7.0-devel-ubuntu20.04
from python:3.11
env DEBIAN_FRONTEND=noninteractive
run apt-get update && \
apt-get install -y ffmpeg libsm6 libxext6
workdir /skynet
copy requirements.cuda* ./
run pip install -U pip ninja
run pip install -v -r requirements.cuda.0.txt
run pip install -v -r requirements.cuda.1.txt
run pip install -v -r requirements.cuda.2.txt
copy requirements.txt requirements.txt
copy pytest.ini pytest.ini
copy setup.py setup.py
copy skynet skynet
run pip install -e . -r requirements.txt
env PYTORCH_CUDA_ALLOC_CONF max_split_size_mb:128
env NVIDIA_VISIBLE_DEVICES=all
env HF_HOME /hf_home
copy tests tests

View File

@ -1 +0,0 @@
git+https://github.com/facebookresearch/xformers.git@main#egg=xformers

View File

@ -1,2 +0,0 @@
basicsr
realesrgan

View File

@ -1,9 +1,14 @@
torch
scipy scipy
triton triton
xformers
accelerate accelerate
transformers transformers
huggingface_hub huggingface_hub
diffusers[torch]>=0.18.0 diffusers[torch]
invisible-watermark invisible-watermark
torch==1.13.0+cu117
basicsr
realesrgan
--extra-index-url https://download.pytorch.org/whl/cu117 --extra-index-url https://download.pytorch.org/whl/cu117

View File

@ -5,18 +5,18 @@ VERSION = '0.1a11'
DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda' DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda'
MODELS = { MODELS = {
'prompthero/openjourney': { 'short': 'midj'}, 'prompthero/openjourney': { 'short': 'midj', 'mem': 8 },
'runwayml/stable-diffusion-v1-5': { 'short': 'stable'}, 'runwayml/stable-diffusion-v1-5': { 'short': 'stable', 'mem': 8 },
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2'}, 'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2', 'mem': 8 },
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9'}, 'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9', 'mem': 24 },
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl'}, 'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl', 'mem': 24 },
'Linaqruf/anything-v3.0': { 'short': 'hdanime'}, 'Linaqruf/anything-v3.0': { 'short': 'hdanime', 'mem': 8 },
'hakurei/waifu-diffusion': { 'short': 'waifu'}, 'hakurei/waifu-diffusion': { 'short': 'waifu', 'mem': 8 },
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli'}, 'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli', 'mem': 8 },
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh'}, 'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh', 'mem': 8 },
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon'}, 'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon', 'mem': 8 },
'Envvi/Inkpunk-Diffusion': { 'short': 'ink'}, 'Envvi/Inkpunk-Diffusion': { 'short': 'ink', 'mem': 8 },
'nousr/robo-diffusion': { 'short': 'robot'} 'nousr/robo-diffusion': { 'short': 'robot', 'mem': 8 }
} }
SHORT_NAMES = [ SHORT_NAMES = [
@ -153,10 +153,11 @@ DEFAULT_UPSCALER = None
DEFAULT_CONFIG_PATH = 'skynet.ini' DEFAULT_CONFIG_PATH = 'skynet.ini'
DEFAULT_INITAL_MODELS = [ DEFAULT_INITAL_MODELS = [
'prompthero/openjourney', 'stabilityai/stable-diffusion-xl-base-1.0'
'runwayml/stable-diffusion-v1-5'
] ]
DEFAULT_SINGLE_CARD_MAP = 'cuda:0'
DATE_FORMAT = '%B the %dth %Y, %H:%M:%S' DATE_FORMAT = '%B the %dth %Y, %H:%M:%S'
CONFIG_ATTRS = [ CONFIG_ATTRS = [

View File

@ -83,9 +83,10 @@ def open_ipfs_node(name='skynet-ipfs'):
remove=True remove=True
) )
uid = 1000
gid = 1000
if sys.platform != 'win32': if sys.platform != 'win32':
uid = os.getuid()
gid = os.getgid()
ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target]) ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target])
logging.info(out) logging.info(out)
assert ec == 0 assert ec == 0

View File

@ -4,6 +4,7 @@ import io
import os import os
import time import time
import random import random
import logging
from typing import Optional from typing import Optional
from pathlib import Path from pathlib import Path
@ -24,7 +25,7 @@ from diffusers import (
from realesrgan import RealESRGANer from realesrgan import RealESRGANer
from huggingface_hub import login from huggingface_hub import login
from .constants import MODELS from .constants import MODELS, DEFAULT_SINGLE_CARD_MAP
def time_ms(): def time_ms():
@ -74,16 +75,24 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True) torch.use_deterministic_algorithms(True)
model_info = MODELS[model]
req_mem = model_info['mem']
mem_gb = torch.cuda.mem_get_info()[1] / (10**9)
over_mem = mem_gb < req_mem
if over_mem:
logging.warn(f'model requires {req_mem} but card has {mem_gb}, model will run slower..')
shortname = model_info['short']
params = { params = {
'torch_dtype': torch.float16, 'torch_dtype': torch.float16,
'safety_checker': None 'safety_checker': None
} }
if model == 'runwayml/stable-diffusion-v1-5': if shortname == 'stable':
params['revision'] = 'fp16' params['revision'] = 'fp16'
if (model == 'stabilityai/stable-diffusion-xl-base-1.0' or if 'xl' in shortname:
model == 'snowkidy/stable-diffusion-xl-base-0.9'):
if image: if image:
pipe_class = StableDiffusionXLImg2ImgPipeline pipe_class = StableDiffusionXLImg2ImgPipeline
else: else:
@ -100,10 +109,16 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config( pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
pipe.scheduler.config) pipe.scheduler.config)
if over_mem:
if not image: if not image:
pipe.enable_vae_slicing() pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
return pipe.to('cuda') pipe.enable_model_cpu_offload()
pipe.enable_xformers_memory_efficient_attention()
return pipe
def txt2img( def txt2img(

113
wrapdocker 100755
View File

@ -0,0 +1,113 @@
#!/bin/bash
# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
dmsetup mknodes
# First, make sure that cgroups are mounted correctly.
CGROUP=/sys/fs/cgroup
: {LOG:=stdio}
[ -d $CGROUP ] ||
mkdir $CGROUP
mountpoint -q $CGROUP ||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
echo "Could not make a tmpfs mount. Did you use --privileged?"
exit 1
}
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
then
mount -t securityfs none /sys/kernel/security || {
echo "Could not mount /sys/kernel/security."
echo "AppArmor detection and --privileged mode might break."
}
fi
# Mount the cgroup hierarchies exactly as they are in the parent system.
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
do
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
mountpoint -q $CGROUP/$SUBSYS ||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
# The two following sections address a bug which manifests itself
# by a cryptic "lxc-start: no ns_cgroup option specified" when
# trying to start containers withina container.
# The bug seems to appear when the cgroup hierarchies are not
# mounted on the exact same directories in the host, and in the
# container.
# Named, control-less cgroups are mounted with "-o name=foo"
# (and appear as such under /proc/<pid>/cgroup) but are usually
# mounted on a directory named "foo" (without the "name=" prefix).
# Systemd and OpenRC (and possibly others) both create such a
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
# "name=foo". This shouldn't have any adverse effect.
echo $SUBSYS | grep -q ^name= && {
NAME=$(echo $SUBSYS | sed s/^name=//)
ln -s $SUBSYS $CGROUP/$NAME
}
# Likewise, on at least one system, it has been reported that
# systemd would mount the CPU and CPU accounting controllers
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
# but on a directory called "cpu,cpuacct" (note the inversion
# in the order of the groups). This tries to work around it.
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
done
# Note: as I write those lines, the LXC userland tools cannot setup
# a "sub-container" properly if the "devices" cgroup is not in its
# own hierarchy. Let's detect this and issue a warning.
grep -q :devices: /proc/1/cgroup ||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
grep -qw devices /proc/1/cgroup ||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
# Now, close extraneous file descriptors.
pushd /proc/self/fd >/dev/null
for FD in *
do
case "$FD" in
# Keep stdin/stdout/stderr
[012])
;;
# Nuke everything else
*)
eval exec "$FD>&-"
;;
esac
done
popd >/dev/null
# If a pidfile is still around (for example after a container restart),
# delete it so that docker can start.
rm -rf /var/run/docker.pid
# If we were given a PORT environment variable, start as a simple daemon;
# otherwise, spawn a shell as well
if [ "$PORT" ]
then
exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \
$DOCKER_DAEMON_ARGS
else
if [ "$LOG" == "file" ]
then
dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log &
else
dockerd $DOCKER_DAEMON_ARGS &
fi
(( timeout = 60 + SECONDS ))
until docker info >/dev/null 2>&1
do
if (( SECONDS >= timeout )); then
echo 'Timed out trying to connect to internal docker host.' >&2
break
fi
sleep 1
done
[[ $1 ]] && exec "$@"
exec bash --login
fi