mirror of https://github.com/skygpu/skynet.git
Enable certain memory optmization options on cases where trying to load a large model on a low end card
parent
ffcf9dc905
commit
8a5e32d452
|
@ -0,0 +1,36 @@
|
||||||
|
from pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
|
||||||
|
env DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
run apt-get update -qq && apt-get install -qqy \
|
||||||
|
apt-transport-https \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
git \
|
||||||
|
lxc \
|
||||||
|
vim \
|
||||||
|
ffmpeg \
|
||||||
|
libsm6 \
|
||||||
|
libxext6 \
|
||||||
|
iptables
|
||||||
|
|
||||||
|
run curl -sSL https://get.docker.com/ | sh
|
||||||
|
|
||||||
|
add ./wrapdocker /usr/local/bin/wrapdocker
|
||||||
|
run chmod +x /usr/local/bin/wrapdocker
|
||||||
|
|
||||||
|
volume /var/lib/docker
|
||||||
|
|
||||||
|
env HF_HOME hf_home
|
||||||
|
|
||||||
|
workdir /root/target
|
||||||
|
|
||||||
|
add ./requirements.cuda.txt requirements.cuda.txt
|
||||||
|
add ./requirements.txt requirements.txt
|
||||||
|
add ./setup.py setup.py
|
||||||
|
add ./skynet skynet
|
||||||
|
|
||||||
|
run pip install -r requirements.cuda.txt
|
||||||
|
run pip install -r requirements.txt
|
||||||
|
run pip install -e .
|
||||||
|
|
||||||
|
cmd ["wrapdocker"]
|
|
@ -1,16 +0,0 @@
|
||||||
from python:3.10.0
|
|
||||||
|
|
||||||
env DEBIAN_FRONTEND=noninteractive
|
|
||||||
|
|
||||||
workdir /skynet
|
|
||||||
|
|
||||||
copy requirements.txt requirements.txt
|
|
||||||
copy pytest.ini ./
|
|
||||||
copy setup.py ./
|
|
||||||
copy skynet ./skynet
|
|
||||||
|
|
||||||
run pip install \
|
|
||||||
-e . \
|
|
||||||
-r requirements.txt
|
|
||||||
|
|
||||||
copy tests ./
|
|
|
@ -1,29 +0,0 @@
|
||||||
from nvidia/cuda:11.7.0-devel-ubuntu20.04
|
|
||||||
from python:3.11
|
|
||||||
|
|
||||||
env DEBIAN_FRONTEND=noninteractive
|
|
||||||
|
|
||||||
run apt-get update && \
|
|
||||||
apt-get install -y ffmpeg libsm6 libxext6
|
|
||||||
|
|
||||||
workdir /skynet
|
|
||||||
|
|
||||||
copy requirements.cuda* ./
|
|
||||||
|
|
||||||
run pip install -U pip ninja
|
|
||||||
run pip install -v -r requirements.cuda.0.txt
|
|
||||||
run pip install -v -r requirements.cuda.1.txt
|
|
||||||
run pip install -v -r requirements.cuda.2.txt
|
|
||||||
|
|
||||||
copy requirements.txt requirements.txt
|
|
||||||
copy pytest.ini pytest.ini
|
|
||||||
copy setup.py setup.py
|
|
||||||
copy skynet skynet
|
|
||||||
|
|
||||||
run pip install -e . -r requirements.txt
|
|
||||||
|
|
||||||
env PYTORCH_CUDA_ALLOC_CONF max_split_size_mb:128
|
|
||||||
env NVIDIA_VISIBLE_DEVICES=all
|
|
||||||
env HF_HOME /hf_home
|
|
||||||
|
|
||||||
copy tests tests
|
|
|
@ -1 +0,0 @@
|
||||||
git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
|
|
|
@ -1,2 +0,0 @@
|
||||||
basicsr
|
|
||||||
realesrgan
|
|
|
@ -1,9 +1,14 @@
|
||||||
|
torch
|
||||||
scipy
|
scipy
|
||||||
triton
|
triton
|
||||||
|
xformers
|
||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
huggingface_hub
|
huggingface_hub
|
||||||
diffusers[torch]>=0.18.0
|
diffusers[torch]
|
||||||
invisible-watermark
|
invisible-watermark
|
||||||
torch==1.13.0+cu117
|
|
||||||
|
basicsr
|
||||||
|
realesrgan
|
||||||
|
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu117
|
--extra-index-url https://download.pytorch.org/whl/cu117
|
|
@ -5,18 +5,18 @@ VERSION = '0.1a11'
|
||||||
DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda'
|
DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda'
|
||||||
|
|
||||||
MODELS = {
|
MODELS = {
|
||||||
'prompthero/openjourney': { 'short': 'midj'},
|
'prompthero/openjourney': { 'short': 'midj', 'mem': 8 },
|
||||||
'runwayml/stable-diffusion-v1-5': { 'short': 'stable'},
|
'runwayml/stable-diffusion-v1-5': { 'short': 'stable', 'mem': 8 },
|
||||||
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2'},
|
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2', 'mem': 8 },
|
||||||
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9'},
|
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9', 'mem': 24 },
|
||||||
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl'},
|
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl', 'mem': 24 },
|
||||||
'Linaqruf/anything-v3.0': { 'short': 'hdanime'},
|
'Linaqruf/anything-v3.0': { 'short': 'hdanime', 'mem': 8 },
|
||||||
'hakurei/waifu-diffusion': { 'short': 'waifu'},
|
'hakurei/waifu-diffusion': { 'short': 'waifu', 'mem': 8 },
|
||||||
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli'},
|
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli', 'mem': 8 },
|
||||||
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh'},
|
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh', 'mem': 8 },
|
||||||
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon'},
|
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon', 'mem': 8 },
|
||||||
'Envvi/Inkpunk-Diffusion': { 'short': 'ink'},
|
'Envvi/Inkpunk-Diffusion': { 'short': 'ink', 'mem': 8 },
|
||||||
'nousr/robo-diffusion': { 'short': 'robot'}
|
'nousr/robo-diffusion': { 'short': 'robot', 'mem': 8 }
|
||||||
}
|
}
|
||||||
|
|
||||||
SHORT_NAMES = [
|
SHORT_NAMES = [
|
||||||
|
@ -153,10 +153,11 @@ DEFAULT_UPSCALER = None
|
||||||
DEFAULT_CONFIG_PATH = 'skynet.ini'
|
DEFAULT_CONFIG_PATH = 'skynet.ini'
|
||||||
|
|
||||||
DEFAULT_INITAL_MODELS = [
|
DEFAULT_INITAL_MODELS = [
|
||||||
'prompthero/openjourney',
|
'stabilityai/stable-diffusion-xl-base-1.0'
|
||||||
'runwayml/stable-diffusion-v1-5'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
DEFAULT_SINGLE_CARD_MAP = 'cuda:0'
|
||||||
|
|
||||||
DATE_FORMAT = '%B the %dth %Y, %H:%M:%S'
|
DATE_FORMAT = '%B the %dth %Y, %H:%M:%S'
|
||||||
|
|
||||||
CONFIG_ATTRS = [
|
CONFIG_ATTRS = [
|
||||||
|
|
|
@ -83,9 +83,10 @@ def open_ipfs_node(name='skynet-ipfs'):
|
||||||
remove=True
|
remove=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
uid = 1000
|
||||||
|
gid = 1000
|
||||||
|
|
||||||
if sys.platform != 'win32':
|
if sys.platform != 'win32':
|
||||||
uid = os.getuid()
|
|
||||||
gid = os.getgid()
|
|
||||||
ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target])
|
ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target])
|
||||||
logging.info(out)
|
logging.info(out)
|
||||||
assert ec == 0
|
assert ec == 0
|
||||||
|
|
|
@ -4,6 +4,7 @@ import io
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
import logging
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -24,7 +25,7 @@ from diffusers import (
|
||||||
from realesrgan import RealESRGANer
|
from realesrgan import RealESRGANer
|
||||||
from huggingface_hub import login
|
from huggingface_hub import login
|
||||||
|
|
||||||
from .constants import MODELS
|
from .constants import MODELS, DEFAULT_SINGLE_CARD_MAP
|
||||||
|
|
||||||
|
|
||||||
def time_ms():
|
def time_ms():
|
||||||
|
@ -74,16 +75,24 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
|
||||||
torch.backends.cudnn.benchmark = False
|
torch.backends.cudnn.benchmark = False
|
||||||
torch.use_deterministic_algorithms(True)
|
torch.use_deterministic_algorithms(True)
|
||||||
|
|
||||||
|
model_info = MODELS[model]
|
||||||
|
|
||||||
|
req_mem = model_info['mem']
|
||||||
|
mem_gb = torch.cuda.mem_get_info()[1] / (10**9)
|
||||||
|
over_mem = mem_gb < req_mem
|
||||||
|
if over_mem:
|
||||||
|
logging.warn(f'model requires {req_mem} but card has {mem_gb}, model will run slower..')
|
||||||
|
|
||||||
|
shortname = model_info['short']
|
||||||
params = {
|
params = {
|
||||||
'torch_dtype': torch.float16,
|
'torch_dtype': torch.float16,
|
||||||
'safety_checker': None
|
'safety_checker': None
|
||||||
}
|
}
|
||||||
|
|
||||||
if model == 'runwayml/stable-diffusion-v1-5':
|
if shortname == 'stable':
|
||||||
params['revision'] = 'fp16'
|
params['revision'] = 'fp16'
|
||||||
|
|
||||||
if (model == 'stabilityai/stable-diffusion-xl-base-1.0' or
|
if 'xl' in shortname:
|
||||||
model == 'snowkidy/stable-diffusion-xl-base-0.9'):
|
|
||||||
if image:
|
if image:
|
||||||
pipe_class = StableDiffusionXLImg2ImgPipeline
|
pipe_class = StableDiffusionXLImg2ImgPipeline
|
||||||
else:
|
else:
|
||||||
|
@ -100,10 +109,16 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
|
||||||
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
||||||
pipe.scheduler.config)
|
pipe.scheduler.config)
|
||||||
|
|
||||||
|
if over_mem:
|
||||||
if not image:
|
if not image:
|
||||||
pipe.enable_vae_slicing()
|
pipe.enable_vae_slicing()
|
||||||
|
pipe.enable_vae_tiling()
|
||||||
|
|
||||||
return pipe.to('cuda')
|
pipe.enable_model_cpu_offload()
|
||||||
|
|
||||||
|
pipe.enable_xformers_memory_efficient_attention()
|
||||||
|
|
||||||
|
return pipe
|
||||||
|
|
||||||
|
|
||||||
def txt2img(
|
def txt2img(
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
|
||||||
|
dmsetup mknodes
|
||||||
|
|
||||||
|
# First, make sure that cgroups are mounted correctly.
|
||||||
|
CGROUP=/sys/fs/cgroup
|
||||||
|
: {LOG:=stdio}
|
||||||
|
|
||||||
|
[ -d $CGROUP ] ||
|
||||||
|
mkdir $CGROUP
|
||||||
|
|
||||||
|
mountpoint -q $CGROUP ||
|
||||||
|
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
|
||||||
|
echo "Could not make a tmpfs mount. Did you use --privileged?"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
|
||||||
|
then
|
||||||
|
mount -t securityfs none /sys/kernel/security || {
|
||||||
|
echo "Could not mount /sys/kernel/security."
|
||||||
|
echo "AppArmor detection and --privileged mode might break."
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mount the cgroup hierarchies exactly as they are in the parent system.
|
||||||
|
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
|
||||||
|
do
|
||||||
|
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
|
||||||
|
mountpoint -q $CGROUP/$SUBSYS ||
|
||||||
|
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
|
||||||
|
|
||||||
|
# The two following sections address a bug which manifests itself
|
||||||
|
# by a cryptic "lxc-start: no ns_cgroup option specified" when
|
||||||
|
# trying to start containers withina container.
|
||||||
|
# The bug seems to appear when the cgroup hierarchies are not
|
||||||
|
# mounted on the exact same directories in the host, and in the
|
||||||
|
# container.
|
||||||
|
|
||||||
|
# Named, control-less cgroups are mounted with "-o name=foo"
|
||||||
|
# (and appear as such under /proc/<pid>/cgroup) but are usually
|
||||||
|
# mounted on a directory named "foo" (without the "name=" prefix).
|
||||||
|
# Systemd and OpenRC (and possibly others) both create such a
|
||||||
|
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
|
||||||
|
# "name=foo". This shouldn't have any adverse effect.
|
||||||
|
echo $SUBSYS | grep -q ^name= && {
|
||||||
|
NAME=$(echo $SUBSYS | sed s/^name=//)
|
||||||
|
ln -s $SUBSYS $CGROUP/$NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
# Likewise, on at least one system, it has been reported that
|
||||||
|
# systemd would mount the CPU and CPU accounting controllers
|
||||||
|
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
|
||||||
|
# but on a directory called "cpu,cpuacct" (note the inversion
|
||||||
|
# in the order of the groups). This tries to work around it.
|
||||||
|
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
|
||||||
|
done
|
||||||
|
|
||||||
|
# Note: as I write those lines, the LXC userland tools cannot setup
|
||||||
|
# a "sub-container" properly if the "devices" cgroup is not in its
|
||||||
|
# own hierarchy. Let's detect this and issue a warning.
|
||||||
|
grep -q :devices: /proc/1/cgroup ||
|
||||||
|
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
|
||||||
|
grep -qw devices /proc/1/cgroup ||
|
||||||
|
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
|
||||||
|
|
||||||
|
# Now, close extraneous file descriptors.
|
||||||
|
pushd /proc/self/fd >/dev/null
|
||||||
|
for FD in *
|
||||||
|
do
|
||||||
|
case "$FD" in
|
||||||
|
# Keep stdin/stdout/stderr
|
||||||
|
[012])
|
||||||
|
;;
|
||||||
|
# Nuke everything else
|
||||||
|
*)
|
||||||
|
eval exec "$FD>&-"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
popd >/dev/null
|
||||||
|
|
||||||
|
|
||||||
|
# If a pidfile is still around (for example after a container restart),
|
||||||
|
# delete it so that docker can start.
|
||||||
|
rm -rf /var/run/docker.pid
|
||||||
|
|
||||||
|
# If we were given a PORT environment variable, start as a simple daemon;
|
||||||
|
# otherwise, spawn a shell as well
|
||||||
|
if [ "$PORT" ]
|
||||||
|
then
|
||||||
|
exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \
|
||||||
|
$DOCKER_DAEMON_ARGS
|
||||||
|
else
|
||||||
|
if [ "$LOG" == "file" ]
|
||||||
|
then
|
||||||
|
dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log &
|
||||||
|
else
|
||||||
|
dockerd $DOCKER_DAEMON_ARGS &
|
||||||
|
fi
|
||||||
|
(( timeout = 60 + SECONDS ))
|
||||||
|
until docker info >/dev/null 2>&1
|
||||||
|
do
|
||||||
|
if (( SECONDS >= timeout )); then
|
||||||
|
echo 'Timed out trying to connect to internal docker host.' >&2
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
[[ $1 ]] && exec "$@"
|
||||||
|
exec bash --login
|
||||||
|
fi
|
Loading…
Reference in New Issue