mirror of https://github.com/skygpu/skynet.git
Enable certain memory optmization options on cases where trying to load a large model on a low end card
parent
ffcf9dc905
commit
8a5e32d452
|
@ -0,0 +1,36 @@
|
|||
from pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
|
||||
env DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
run apt-get update -qq && apt-get install -qqy \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
lxc \
|
||||
vim \
|
||||
ffmpeg \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
iptables
|
||||
|
||||
run curl -sSL https://get.docker.com/ | sh
|
||||
|
||||
add ./wrapdocker /usr/local/bin/wrapdocker
|
||||
run chmod +x /usr/local/bin/wrapdocker
|
||||
|
||||
volume /var/lib/docker
|
||||
|
||||
env HF_HOME hf_home
|
||||
|
||||
workdir /root/target
|
||||
|
||||
add ./requirements.cuda.txt requirements.cuda.txt
|
||||
add ./requirements.txt requirements.txt
|
||||
add ./setup.py setup.py
|
||||
add ./skynet skynet
|
||||
|
||||
run pip install -r requirements.cuda.txt
|
||||
run pip install -r requirements.txt
|
||||
run pip install -e .
|
||||
|
||||
cmd ["wrapdocker"]
|
|
@ -1,16 +0,0 @@
|
|||
from python:3.10.0
|
||||
|
||||
env DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
workdir /skynet
|
||||
|
||||
copy requirements.txt requirements.txt
|
||||
copy pytest.ini ./
|
||||
copy setup.py ./
|
||||
copy skynet ./skynet
|
||||
|
||||
run pip install \
|
||||
-e . \
|
||||
-r requirements.txt
|
||||
|
||||
copy tests ./
|
|
@ -1,29 +0,0 @@
|
|||
from nvidia/cuda:11.7.0-devel-ubuntu20.04
|
||||
from python:3.11
|
||||
|
||||
env DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
run apt-get update && \
|
||||
apt-get install -y ffmpeg libsm6 libxext6
|
||||
|
||||
workdir /skynet
|
||||
|
||||
copy requirements.cuda* ./
|
||||
|
||||
run pip install -U pip ninja
|
||||
run pip install -v -r requirements.cuda.0.txt
|
||||
run pip install -v -r requirements.cuda.1.txt
|
||||
run pip install -v -r requirements.cuda.2.txt
|
||||
|
||||
copy requirements.txt requirements.txt
|
||||
copy pytest.ini pytest.ini
|
||||
copy setup.py setup.py
|
||||
copy skynet skynet
|
||||
|
||||
run pip install -e . -r requirements.txt
|
||||
|
||||
env PYTORCH_CUDA_ALLOC_CONF max_split_size_mb:128
|
||||
env NVIDIA_VISIBLE_DEVICES=all
|
||||
env HF_HOME /hf_home
|
||||
|
||||
copy tests tests
|
|
@ -1 +0,0 @@
|
|||
git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
|
|
@ -1,2 +0,0 @@
|
|||
basicsr
|
||||
realesrgan
|
|
@ -1,9 +1,14 @@
|
|||
torch
|
||||
scipy
|
||||
triton
|
||||
xformers
|
||||
accelerate
|
||||
transformers
|
||||
huggingface_hub
|
||||
diffusers[torch]>=0.18.0
|
||||
diffusers[torch]
|
||||
invisible-watermark
|
||||
torch==1.13.0+cu117
|
||||
|
||||
basicsr
|
||||
realesrgan
|
||||
|
||||
--extra-index-url https://download.pytorch.org/whl/cu117
|
|
@ -5,18 +5,18 @@ VERSION = '0.1a11'
|
|||
DOCKER_RUNTIME_CUDA = 'skynet:runtime-cuda'
|
||||
|
||||
MODELS = {
|
||||
'prompthero/openjourney': { 'short': 'midj'},
|
||||
'runwayml/stable-diffusion-v1-5': { 'short': 'stable'},
|
||||
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2'},
|
||||
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9'},
|
||||
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl'},
|
||||
'Linaqruf/anything-v3.0': { 'short': 'hdanime'},
|
||||
'hakurei/waifu-diffusion': { 'short': 'waifu'},
|
||||
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli'},
|
||||
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh'},
|
||||
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon'},
|
||||
'Envvi/Inkpunk-Diffusion': { 'short': 'ink'},
|
||||
'nousr/robo-diffusion': { 'short': 'robot'}
|
||||
'prompthero/openjourney': { 'short': 'midj', 'mem': 8 },
|
||||
'runwayml/stable-diffusion-v1-5': { 'short': 'stable', 'mem': 8 },
|
||||
'stabilityai/stable-diffusion-2-1-base': { 'short': 'stable2', 'mem': 8 },
|
||||
'snowkidy/stable-diffusion-xl-base-0.9': { 'short': 'stablexl0.9', 'mem': 24 },
|
||||
'stabilityai/stable-diffusion-xl-base-1.0': { 'short': 'stablexl', 'mem': 24 },
|
||||
'Linaqruf/anything-v3.0': { 'short': 'hdanime', 'mem': 8 },
|
||||
'hakurei/waifu-diffusion': { 'short': 'waifu', 'mem': 8 },
|
||||
'nitrosocke/Ghibli-Diffusion': { 'short': 'ghibli', 'mem': 8 },
|
||||
'dallinmackay/Van-Gogh-diffusion': { 'short': 'van-gogh', 'mem': 8 },
|
||||
'lambdalabs/sd-pokemon-diffusers': { 'short': 'pokemon', 'mem': 8 },
|
||||
'Envvi/Inkpunk-Diffusion': { 'short': 'ink', 'mem': 8 },
|
||||
'nousr/robo-diffusion': { 'short': 'robot', 'mem': 8 }
|
||||
}
|
||||
|
||||
SHORT_NAMES = [
|
||||
|
@ -153,10 +153,11 @@ DEFAULT_UPSCALER = None
|
|||
DEFAULT_CONFIG_PATH = 'skynet.ini'
|
||||
|
||||
DEFAULT_INITAL_MODELS = [
|
||||
'prompthero/openjourney',
|
||||
'runwayml/stable-diffusion-v1-5'
|
||||
'stabilityai/stable-diffusion-xl-base-1.0'
|
||||
]
|
||||
|
||||
DEFAULT_SINGLE_CARD_MAP = 'cuda:0'
|
||||
|
||||
DATE_FORMAT = '%B the %dth %Y, %H:%M:%S'
|
||||
|
||||
CONFIG_ATTRS = [
|
||||
|
|
|
@ -83,9 +83,10 @@ def open_ipfs_node(name='skynet-ipfs'):
|
|||
remove=True
|
||||
)
|
||||
|
||||
uid = 1000
|
||||
gid = 1000
|
||||
|
||||
if sys.platform != 'win32':
|
||||
uid = os.getuid()
|
||||
gid = os.getgid()
|
||||
ec, out = container.exec_run(['chown', f'{uid}:{gid}', '-R', export_target])
|
||||
logging.info(out)
|
||||
assert ec == 0
|
||||
|
|
|
@ -4,6 +4,7 @@ import io
|
|||
import os
|
||||
import time
|
||||
import random
|
||||
import logging
|
||||
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
@ -24,7 +25,7 @@ from diffusers import (
|
|||
from realesrgan import RealESRGANer
|
||||
from huggingface_hub import login
|
||||
|
||||
from .constants import MODELS
|
||||
from .constants import MODELS, DEFAULT_SINGLE_CARD_MAP
|
||||
|
||||
|
||||
def time_ms():
|
||||
|
@ -74,16 +75,24 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
|
|||
torch.backends.cudnn.benchmark = False
|
||||
torch.use_deterministic_algorithms(True)
|
||||
|
||||
model_info = MODELS[model]
|
||||
|
||||
req_mem = model_info['mem']
|
||||
mem_gb = torch.cuda.mem_get_info()[1] / (10**9)
|
||||
over_mem = mem_gb < req_mem
|
||||
if over_mem:
|
||||
logging.warn(f'model requires {req_mem} but card has {mem_gb}, model will run slower..')
|
||||
|
||||
shortname = model_info['short']
|
||||
params = {
|
||||
'torch_dtype': torch.float16,
|
||||
'safety_checker': None
|
||||
}
|
||||
|
||||
if model == 'runwayml/stable-diffusion-v1-5':
|
||||
if shortname == 'stable':
|
||||
params['revision'] = 'fp16'
|
||||
|
||||
if (model == 'stabilityai/stable-diffusion-xl-base-1.0' or
|
||||
model == 'snowkidy/stable-diffusion-xl-base-0.9'):
|
||||
if 'xl' in shortname:
|
||||
if image:
|
||||
pipe_class = StableDiffusionXLImg2ImgPipeline
|
||||
else:
|
||||
|
@ -100,10 +109,16 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
|
|||
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
||||
pipe.scheduler.config)
|
||||
|
||||
if over_mem:
|
||||
if not image:
|
||||
pipe.enable_vae_slicing()
|
||||
pipe.enable_vae_tiling()
|
||||
|
||||
return pipe.to('cuda')
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
pipe.enable_xformers_memory_efficient_attention()
|
||||
|
||||
return pipe
|
||||
|
||||
|
||||
def txt2img(
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
|
||||
dmsetup mknodes
|
||||
|
||||
# First, make sure that cgroups are mounted correctly.
|
||||
CGROUP=/sys/fs/cgroup
|
||||
: {LOG:=stdio}
|
||||
|
||||
[ -d $CGROUP ] ||
|
||||
mkdir $CGROUP
|
||||
|
||||
mountpoint -q $CGROUP ||
|
||||
mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || {
|
||||
echo "Could not make a tmpfs mount. Did you use --privileged?"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
|
||||
then
|
||||
mount -t securityfs none /sys/kernel/security || {
|
||||
echo "Could not mount /sys/kernel/security."
|
||||
echo "AppArmor detection and --privileged mode might break."
|
||||
}
|
||||
fi
|
||||
|
||||
# Mount the cgroup hierarchies exactly as they are in the parent system.
|
||||
for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
|
||||
do
|
||||
[ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
|
||||
mountpoint -q $CGROUP/$SUBSYS ||
|
||||
mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
|
||||
|
||||
# The two following sections address a bug which manifests itself
|
||||
# by a cryptic "lxc-start: no ns_cgroup option specified" when
|
||||
# trying to start containers withina container.
|
||||
# The bug seems to appear when the cgroup hierarchies are not
|
||||
# mounted on the exact same directories in the host, and in the
|
||||
# container.
|
||||
|
||||
# Named, control-less cgroups are mounted with "-o name=foo"
|
||||
# (and appear as such under /proc/<pid>/cgroup) but are usually
|
||||
# mounted on a directory named "foo" (without the "name=" prefix).
|
||||
# Systemd and OpenRC (and possibly others) both create such a
|
||||
# cgroup. To avoid the aforementioned bug, we symlink "foo" to
|
||||
# "name=foo". This shouldn't have any adverse effect.
|
||||
echo $SUBSYS | grep -q ^name= && {
|
||||
NAME=$(echo $SUBSYS | sed s/^name=//)
|
||||
ln -s $SUBSYS $CGROUP/$NAME
|
||||
}
|
||||
|
||||
# Likewise, on at least one system, it has been reported that
|
||||
# systemd would mount the CPU and CPU accounting controllers
|
||||
# (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
|
||||
# but on a directory called "cpu,cpuacct" (note the inversion
|
||||
# in the order of the groups). This tries to work around it.
|
||||
[ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
|
||||
done
|
||||
|
||||
# Note: as I write those lines, the LXC userland tools cannot setup
|
||||
# a "sub-container" properly if the "devices" cgroup is not in its
|
||||
# own hierarchy. Let's detect this and issue a warning.
|
||||
grep -q :devices: /proc/1/cgroup ||
|
||||
echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
|
||||
grep -qw devices /proc/1/cgroup ||
|
||||
echo "WARNING: it looks like the 'devices' cgroup is not mounted."
|
||||
|
||||
# Now, close extraneous file descriptors.
|
||||
pushd /proc/self/fd >/dev/null
|
||||
for FD in *
|
||||
do
|
||||
case "$FD" in
|
||||
# Keep stdin/stdout/stderr
|
||||
[012])
|
||||
;;
|
||||
# Nuke everything else
|
||||
*)
|
||||
eval exec "$FD>&-"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
popd >/dev/null
|
||||
|
||||
|
||||
# If a pidfile is still around (for example after a container restart),
|
||||
# delete it so that docker can start.
|
||||
rm -rf /var/run/docker.pid
|
||||
|
||||
# If we were given a PORT environment variable, start as a simple daemon;
|
||||
# otherwise, spawn a shell as well
|
||||
if [ "$PORT" ]
|
||||
then
|
||||
exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \
|
||||
$DOCKER_DAEMON_ARGS
|
||||
else
|
||||
if [ "$LOG" == "file" ]
|
||||
then
|
||||
dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log &
|
||||
else
|
||||
dockerd $DOCKER_DAEMON_ARGS &
|
||||
fi
|
||||
(( timeout = 60 + SECONDS ))
|
||||
until docker info >/dev/null 2>&1
|
||||
do
|
||||
if (( SECONDS >= timeout )); then
|
||||
echo 'Timed out trying to connect to internal docker host.' >&2
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
[[ $1 ]] && exec "$@"
|
||||
exec bash --login
|
||||
fi
|
Loading…
Reference in New Issue