Enable certain memory optmization options on cases where trying to load a large model on a low end card

pull/18/head
Guillermo Rodriguez 2023-08-14 03:15:45 +00:00
parent ffcf9dc905
commit 8a5e32d452
10 changed files with 196 additions and 73 deletions

36
Dockerfile-cuda 100644
View File

@ -0,0 +1,36 @@
from pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
env DEBIAN_FRONTEND=noninteractive
run apt-get update -qq && apt-get install -qqy \
apt-transport-https \
ca-certificates \
curl \
git \
lxc \
vim \
ffmpeg \
libsm6 \
libxext6 \
iptables
run curl -sSL https://get.docker.com/ | sh
add ./wrapdocker /usr/local/bin/wrapdocker
run chmod +x /usr/local/bin/wrapdocker
volume /var/lib/docker
env HF_HOME hf_home
workdir /root/target
add ./requirements.cuda.txt requirements.cuda.txt
add ./requirements.txt requirements.txt
add ./setup.py setup.py
add ./skynet skynet
run pip install -r requirements.cuda.txt
run pip install -r requirements.txt
run pip install -e .
cmd ["wrapdocker"]

View File

@ -1,16 +0,0 @@
from python:3.10.0
env DEBIAN_FRONTEND=noninteractive
workdir /skynet
copy requirements.txt requirements.txt
copy pytest.ini ./
copy setup.py ./
copy skynet ./skynet
run pip install \
-e . \
-r requirements.txt
copy tests ./

View File

@ -1,29 +0,0 @@
from nvidia/cuda:11.7.0-devel-ubuntu20.04
from python:3.11
env DEBIAN_FRONTEND=noninteractive
run apt-get update && \
apt-get install -y ffmpeg libsm6 libxext6
workdir /skynet
copy requirements.cuda* ./
run pip install -U pip ninja
run pip install -v -r requirements.cuda.0.txt
run pip install -v -r requirements.cuda.1.txt
run pip install -v -r requirements.cuda.2.txt
copy requirements.txt requirements.txt
copy pytest.ini pytest.ini
copy setup.py setup.py
copy skynet skynet
run pip install -e . -r requirements.txt
env PYTORCH_CUDA_ALLOC_CONF max_split_size_mb:128
env NVIDIA_VISIBLE_DEVICES=all
env HF_HOME /hf_home
copy tests tests

View File

@ -1 +0,0 @@
git+https://github.com/facebookresearch/xformers.git@main#egg=xformers

View File

@ -1,2 +0,0 @@
basicsr
realesrgan

View File

@ -1,9 +1,14 @@
torch
scipy
triton
xformers
accelerate
transformers
huggingface_hub
diffusers[torch]>=0.18.0
diffusers[torch]
invisible-watermark
torch==1.13.0+cu117
basicsr
realesrgan
--extra-index-url https://download.pytorch.org/whl/cu117

View File

@ -5,18 +5,18 @@ VERSION = '0.1a11'
DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda'
MODELS = {
'prompthero/openjourney': { 'short': 'midj'},
'runwayml/stable-diffusion-v1-5': { 'short': 'stable'},
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2'},
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9'},
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl'},
'Linaqruf/anything-v3.0': { 'short': 'hdanime'},
'hakurei/waifu-diffusion': { 'short': 'waifu'},
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli'},
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh'},
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon'},
'Envvi/Inkpunk-Diffusion': { 'short': 'ink'},
'nousr/robo-diffusion': { 'short': 'robot'}
'prompthero/openjourney': { 'short': 'midj', 'mem': 8 },
'runwayml/stable-diffusion-v1-5': { 'short': 'stable', 'mem': 8 },
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2', 'mem': 8 },
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9', 'mem': 24 },
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl', 'mem': 24 },
'Linaqruf/anything-v3.0': { 'short': 'hdanime', 'mem': 8 },
'hakurei/waifu-diffusion': { 'short': 'waifu', 'mem': 8 },
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli', 'mem': 8 },
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh', 'mem': 8 },
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon', 'mem': 8 },
'Envvi/Inkpunk-Diffusion': { 'short': 'ink', 'mem': 8 },
'nousr/robo-diffusion': { 'short': 'robot', 'mem': 8 }
}
SHORT_NAMES = [
@ -153,10 +153,11 @@ DEFAULT_UPSCALER = None
DEFAULT_CONFIG_PATH = 'skynet.ini'
DEFAULT_INITAL_MODELS = [
'prompthero/openjourney',
'runwayml/stable-diffusion-v1-5'
'stabilityai/stable-diffusion-xl-base-1.0'
]
DEFAULT_SINGLE_CARD_MAP = 'cuda:0'
DATE_FORMAT = '%B the %dth %Y, %H:%M:%S'
CONFIG_ATTRS = [

View File

@ -83,9 +83,10 @@ def open_ipfs_node(name='skynet-ipfs'):
remove=True
)
uid = 1000
gid = 1000
if sys.platform != 'win32':
uid = os.getuid()
gid = os.getgid()
ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target])
logging.info(out)
assert ec == 0

View File

@ -4,6 +4,7 @@ import io
import os
import time
import random
import logging
from typing import Optional
from pathlib import Path
@ -24,7 +25,7 @@ from diffusers import (
from realesrgan import RealESRGANer
from huggingface_hub import login
from .constants import MODELS
from .constants import MODELS, DEFAULT_SINGLE_CARD_MAP
def time_ms():
@ -74,16 +75,24 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
model_info = MODELS[model]
req_mem = model_info['mem']
mem_gb = torch.cuda.mem_get_info()[1] / (10**9)
over_mem = mem_gb < req_mem
if over_mem:
logging.warn(f'model requires {req_mem} but card has {mem_gb}, model will run slower..')
shortname = model_info['short']
params = {
'torch_dtype': torch.float16,
'safety_checker': None
}
if model == 'runwayml/stable-diffusion-v1-5':
if shortname == 'stable':
params['revision'] = 'fp16'
if (model == 'stabilityai/stable-diffusion-xl-base-1.0' or
model == 'snowkidy/stable-diffusion-xl-base-0.9'):
if 'xl' in shortname:
if image:
pipe_class = StableDiffusionXLImg2ImgPipeline
else:
@ -100,10 +109,16 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
pipe.scheduler.config)
if over_mem:
if not image:
pipe.enable_vae_slicing()
pipe.enable_vae_tiling()
return pipe.to('cuda')
pipe.enable_model_cpu_offload()
pipe.enable_xformers_memory_efficient_attention()
return pipe
def txt2img(

113
wrapdocker 100755
View File

@ -0,0 +1,113 @@
#!/bin/bash
# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
dmsetup mknodes
# First, make sure that cgroups are mounted correctly.
CGROUP=/sys/fs/cgroup
: {LOG:=stdio}
[ -d $CGROUP ] ||
mkdir $CGROUP
mountpoint -q $CGROUP ||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
echo "Could not make a tmpfs mount. Did you use --privileged?"
exit 1
}
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
then
mount -t securityfs none /sys/kernel/security || {
echo "Could not mount /sys/kernel/security."
echo "AppArmor detection and --privileged mode might break."
}
fi
# Mount the cgroup hierarchies exactly as they are in the parent system.
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
do
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
mountpoint -q $CGROUP/$SUBSYS ||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
# The two following sections address a bug which manifests itself
# by a cryptic "lxc-start: no ns_cgroup option specified" when
# trying to start containers withina container.
# The bug seems to appear when the cgroup hierarchies are not
# mounted on the exact same directories in the host, and in the
# container.
# Named, control-less cgroups are mounted with "-o name=foo"
# (and appear as such under /proc/<pid>/cgroup) but are usually
# mounted on a directory named "foo" (without the "name=" prefix).
# Systemd and OpenRC (and possibly others) both create such a
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
# "name=foo". This shouldn't have any adverse effect.
echo $SUBSYS | grep -q ^name= && {
NAME=$(echo $SUBSYS | sed s/^name=//)
ln -s $SUBSYS $CGROUP/$NAME
}
# Likewise, on at least one system, it has been reported that
# systemd would mount the CPU and CPU accounting controllers
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
# but on a directory called "cpu,cpuacct" (note the inversion
# in the order of the groups). This tries to work around it.
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
done
# Note: as I write those lines, the LXC userland tools cannot setup
# a "sub-container" properly if the "devices" cgroup is not in its
# own hierarchy. Let's detect this and issue a warning.
grep -q :devices: /proc/1/cgroup ||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
grep -qw devices /proc/1/cgroup ||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
# Now, close extraneous file descriptors.
pushd /proc/self/fd >/dev/null
for FD in *
do
case "$FD" in
# Keep stdin/stdout/stderr
[012])
;;
# Nuke everything else
*)
eval exec "$FD>&-"
;;
esac
done
popd >/dev/null
# If a pidfile is still around (for example after a container restart),
# delete it so that docker can start.
rm -rf /var/run/docker.pid
# If we were given a PORT environment variable, start as a simple daemon;
# otherwise, spawn a shell as well
if [ "$PORT" ]
then
exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \
$DOCKER_DAEMON_ARGS
else
if [ "$LOG" == "file" ]
then
dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log &
else
dockerd $DOCKER_DAEMON_ARGS &
fi
(( timeout = 60 + SECONDS ))
until docker info >/dev/null 2>&1
do
if (( SECONDS >= timeout )); then
echo 'Timed out trying to connect to internal docker host.' >&2
break
fi
sleep 1
done
[[ $1 ]] && exec "$@"
exec bash --login
fi