update txt2txt utils, create pipeline, update cli

2023-06-08 23:29:05 -04:00 · 2023-06-08 23:29:05 -04:00 · 82a7a3e076
parent f66550b3b9
commit 82a7a3e076
3 changed files with 74 additions and 28 deletions
--- a/skynet/cli.py
+++ b/skynet/cli.py
@ -80,6 +80,9 @@ def img2img(model, prompt, input, output, strength, guidance, steps, seed):
@click.option('--output', '-o', default='output.txt')
@click.option('--temperature', '-t', default=1.0)
@click.option('--max-length', '-ml', default=256)
+@click.option('--num-return-sequences', '-rs', defautl=1)
+@click.option('--no-repeat-ngram', '-nr', default=2)
+@click.option('--top-p', '-tp', default=0.95)
 def txt2txt(*args, **kwargs):
    from . import utils
    _, hf_token, _, cfg = init_env_from_config()
@ -151,8 +154,9 @@ def enqueue(
        binary = ''

        ec, out = cleos.push_action(
-            'telos.gpu', 'enqueue', [account, req,
-                                     binary, reward], f'{account}@{permission}'
+            'telos.gpu', 'enqueue',
+            [account, req, binary, reward],
+            f'{account}@{permission}'
        )

        print(collect_stdout(out))
@ -293,7 +297,8 @@ def config(
        'user', node_url, None, None)
    with open_cleos(node_url, key=key) as cleos:
        ec, out = cleos.push_action(
-            'telos.gpu', 'config', [token_contract, token_symbol], f'{account}@{permission}'
+            'telos.gpu', 'config', [token_contract,
+                                    token_symbol], f'{account}@{permission}'
        )

        print(collect_stdout(out))
--- a/skynet/dgpu/compute.py
+++ b/skynet/dgpu/compute.py
@ -12,7 +12,7 @@ import torch
 from skynet.constants import DEFAULT_INITAL_MODELS, MODELS
 from skynet.dgpu.errors import DGPUComputeError

-from skynet.utils import convert_from_bytes_and_crop, convert_from_cv2_to_image, convert_from_image_to_cv2, convert_from_img_to_bytes, init_upscaler, pipeline_for
+from skynet.utils import convert_from_bytes_and_crop, convert_from_cv2_to_image, convert_from_image_to_cv2, convert_from_img_to_bytes, init_upscaler, pipeline_for_image


 def prepare_params_for_diffuse(
@ -62,7 +62,7 @@ class SkynetMM:
    def is_model_loaded(self, model_name: str, image: bool):
        for model_key, model_data in self._models.items():
            if (model_key == model_name and
-                model_data['image'] == image):
+                    model_data['image'] == image):
                return True

        return False
@ -75,7 +75,7 @@ class SkynetMM:
    ):
        logging.info(f'loading model {model_name}...')
        if force or len(self._models.keys()) == 0:
-            pipe = pipeline_for(model_name, image=image)
+            pipe = pipeline_for_image(model_name, image=image)
            self._models[model_name] = {
                'pipe': pipe,
                'generated': 0,
@ -87,7 +87,7 @@ class SkynetMM:

            for model in self._models:
                if self._models[
-                    least_used]['generated'] > self._models[model]['generated']:
+                        least_used]['generated'] > self._models[model]['generated']:
                    least_used = model

            del self._models[least_used]
@ -97,7 +97,7 @@ class SkynetMM:
            gc.collect()
            torch.cuda.empty_cache()

-            pipe = pipeline_for(model_name, image=image)
+            pipe = pipeline_for_image(model_name, image=image)

            self._models[model_name] = {
                'pipe': pipe,
@ -133,7 +133,8 @@ class SkynetMM:

                    arguments = prepare_params_for_diffuse(params, binary)
                    prompt, guidance, step, seed, upscaler, extra_params = arguments
-                    model = self.get_model(params['model'], 'image' in extra_params)
+                    model = self.get_model(
+                        params['model'], 'image' in extra_params)

                    image = model(
                        prompt,
@ -155,6 +156,9 @@ class SkynetMM:

                    return img_sha, img_raw

+                case 'transformer':
+                    # TODO: Understand dpgu code and figure out what to put here
+                    pass
                case _:
                    raise DGPUComputeError('Unsupported compute method')

--- a/skynet/utils.py
+++ b/skynet/utils.py
@ -19,7 +19,7 @@ from diffusers import (
    StableDiffusionImg2ImgPipeline,
    EulerAncestralDiscreteScheduler
 )
-from transformers import pipeline, Conversation
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from realesrgan import RealESRGANer
 from huggingface_hub import login

@ -59,7 +59,7 @@ def convert_from_bytes_and_crop(raw: bytes, max_w: int, max_h: int) -> Image:
    return image.convert('RGB')


-def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
+def pipeline_for_image(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
    assert torch.cuda.is_available()
    torch.cuda.empty_cache()
    torch.cuda.set_per_process_memory_fraction(mem_fraction)
@ -98,6 +98,41 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
    return pipe.to('cuda')


+def pipeline_for_text(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
+    assert torch.cuda.is_available()
+    torch.cuda.empty_cache()
+    torch.cuda.set_per_process_memory_fraction(mem_fraction)
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+
+    # NOTE: image could be used for image to text
+    # NOTE: note sure if this is necessary or what it does exactly
+    # full determinism
+    # https://huggingface.co/docs/diffusers/using-diffusers/reproducibility#deterministic-algorithms
+    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
+
+    torch.backends.cudnn.benchmark = False
+    torch.use_deterministic_algorithms(True)
+
+    params = {
+        'torch_dtype': torch.float16,
+        'safety_checker': None
+    }
+
+    pipe = AutoModelForCausalLM.from_pretrained(
+        model, **params
+    )
+
+    # TODO: look if scheduler is necessary and what does this code do
+    # pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    #     pipe.scheduler.config)
+    #
+    # if not image:
+    #     pipe.enable_vae_slicing()
+    #
+    return pipe.to('cuda')
+
+
 def txt2img(
    hf_token: str,
    model: str = 'prompthero/openjourney',
@ -115,7 +150,7 @@ def txt2img(
    torch.backends.cudnn.allow_tf32 = True

    login(token=hf_token)
-    pipe = pipeline_for(model)
+    pipe = pipeline_for_image(model)

    seed = seed if seed else random.randint(0, 2 ** 64)
    prompt = prompt
@ -148,7 +183,7 @@ def img2img(
    torch.backends.cudnn.allow_tf32 = True

    login(token=hf_token)
-    pipe = pipeline_for(model, image=True)
+    pipe = pipeline_for_image(model, image=True)

    with open(img_path, 'rb') as img_file:
        input_img = convert_from_bytes_and_crop(img_file.read(), 512, 512)
@ -168,11 +203,12 @@ def img2img(

 def txt2txt(
    hf_token: str,
-    # TODO: change this to actual model ref
-    #       add more granular control of models
-    model: str = 'microsoft/DialoGPT-small',
+    model: str = 'tiiuae/falcon-40b-instruct',
    prompt: str = 'a red old tractor in a sunny wheat field',
    output: str = 'output.txt',
+    num_return_sequences: int = 1,
+    no_repeat_ngram_size: int = 2,
+    top_p: float = 0.95,
    temperature: float = 1.0,
    max_length: int = 256,
 ):
@ -183,24 +219,25 @@ def txt2txt(
    torch.backends.cudnn.allow_tf32 = True

    login(token=hf_token)
-    chatbot = pipeline('text-generation', model=model, device_map='auto')
+    tokenizer = AutoTokenizer.from_pretrained(model)
+    pipe = pipeline_for_text(model)

    prompt = prompt
-    conversation = Conversation(prompt)
-    conversation = chatbot(
-        conversation,
+    # TODO: learn more about return tensors and model params
+    tokenized_input = tokenizer.encode(prompt, return_tensors='pt')
+    tokenized_output = pipe.generate(
+        tokenized_input,
        max_length=max_length,
-        do_sample=True,
+        num_return_sequences=num_return_sequences,
+        no_repeat_ngram_size=2,
+        top_p=0.95,
        temperature=temperature
    )
-    response = conversation.generated_responses[-1]
+    response = tokenizer.decode(tokenized_output, skip_special_tokens=True)
    with open(output, 'w', encoding='utf-8') as f:
        f.write(response)

-    # This if for continued conversatin, need to figure out how to store convo
-    # conversation.add_user_input("Is it an action movie?")
-    # conversation = chatbot(conversation)
-    # conversation.generated_responses[-1]
+    # TODO: figure out continued conversation, store data on frontend?


 def init_upscaler(model_path: str = 'weights/RealESRGAN_x4plus.pth'):
@ -249,6 +286,6 @@ def download_all_models(hf_token: str):
    login(token=hf_token)
    for model in MODELS:
        print(f'DOWNLOADING {model.upper()}')
-        pipeline_for(model)
+        pipeline_for_image(model)
        print(f'DOWNLOADING IMAGE {model.upper()}')
-        pipeline_for(model, image=True)
+        pipeline_for_image(model, image=True)