From 82a7a3e0764cc916ec34755e022b41bc439b8bf4 Mon Sep 17 00:00:00 2001
From: Konstantine Tsafatinos <kosta.tsaf@gmail.com>
Date: Thu, 8 Jun 2023 23:29:05 -0400
Subject: [PATCH] update txt2txt utils, create pipeline, update cli

---
 skynet/cli.py          | 11 +++++--
 skynet/dgpu/compute.py | 16 +++++----
 skynet/utils.py        | 75 +++++++++++++++++++++++++++++++-----------
 3 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/skynet/cli.py b/skynet/cli.py
index 2558ca1..54ae10a 100644
--- a/skynet/cli.py
+++ b/skynet/cli.py
@@ -80,6 +80,9 @@ def img2img(model, prompt, input, output, strength, guidance, steps, seed):
 @click.option('--output', '-o', default='output.txt')
 @click.option('--temperature', '-t', default=1.0)
 @click.option('--max-length', '-ml', default=256)
+@click.option('--num-return-sequences', '-rs', defautl=1)
+@click.option('--no-repeat-ngram', '-nr', default=2)
+@click.option('--top-p', '-tp', default=0.95)
 def txt2txt(*args, **kwargs):
     from . import utils
     _, hf_token, _, cfg = init_env_from_config()
@@ -151,8 +154,9 @@ def enqueue(
         binary = ''
 
         ec, out = cleos.push_action(
-            'telos.gpu', 'enqueue', [account, req,
-                                     binary, reward], f'{account}@{permission}'
+            'telos.gpu', 'enqueue',
+            [account, req, binary, reward],
+            f'{account}@{permission}'
         )
 
         print(collect_stdout(out))
@@ -293,7 +297,8 @@ def config(
         'user', node_url, None, None)
     with open_cleos(node_url, key=key) as cleos:
         ec, out = cleos.push_action(
-            'telos.gpu', 'config', [token_contract, token_symbol], f'{account}@{permission}'
+            'telos.gpu', 'config', [token_contract,
+                                    token_symbol], f'{account}@{permission}'
         )
 
         print(collect_stdout(out))
diff --git a/skynet/dgpu/compute.py b/skynet/dgpu/compute.py
index 069af47..d1fa79c 100644
--- a/skynet/dgpu/compute.py
+++ b/skynet/dgpu/compute.py
@@ -12,7 +12,7 @@ import torch
 from skynet.constants import DEFAULT_INITAL_MODELS, MODELS
 from skynet.dgpu.errors import DGPUComputeError
 
-from skynet.utils import convert_from_bytes_and_crop, convert_from_cv2_to_image, convert_from_image_to_cv2, convert_from_img_to_bytes, init_upscaler, pipeline_for
+from skynet.utils import convert_from_bytes_and_crop, convert_from_cv2_to_image, convert_from_image_to_cv2, convert_from_img_to_bytes, init_upscaler, pipeline_for_image
 
 
 def prepare_params_for_diffuse(
@@ -62,7 +62,7 @@ class SkynetMM:
     def is_model_loaded(self, model_name: str, image: bool):
         for model_key, model_data in self._models.items():
             if (model_key == model_name and
-                model_data['image'] == image):
+                    model_data['image'] == image):
                 return True
 
         return False
@@ -75,7 +75,7 @@ class SkynetMM:
     ):
         logging.info(f'loading model {model_name}...')
         if force or len(self._models.keys()) == 0:
-            pipe = pipeline_for(model_name, image=image)
+            pipe = pipeline_for_image(model_name, image=image)
             self._models[model_name] = {
                 'pipe': pipe,
                 'generated': 0,
@@ -87,7 +87,7 @@ class SkynetMM:
 
             for model in self._models:
                 if self._models[
-                    least_used]['generated'] > self._models[model]['generated']:
+                        least_used]['generated'] > self._models[model]['generated']:
                     least_used = model
 
             del self._models[least_used]
@@ -97,7 +97,7 @@ class SkynetMM:
             gc.collect()
             torch.cuda.empty_cache()
 
-            pipe = pipeline_for(model_name, image=image)
+            pipe = pipeline_for_image(model_name, image=image)
 
             self._models[model_name] = {
                 'pipe': pipe,
@@ -133,7 +133,8 @@ class SkynetMM:
 
                     arguments = prepare_params_for_diffuse(params, binary)
                     prompt, guidance, step, seed, upscaler, extra_params = arguments
-                    model = self.get_model(params['model'], 'image' in extra_params)
+                    model = self.get_model(
+                        params['model'], 'image' in extra_params)
 
                     image = model(
                         prompt,
@@ -155,6 +156,9 @@ class SkynetMM:
 
                     return img_sha, img_raw
 
+                case 'transformer':
+                    # TODO: Understand dpgu code and figure out what to put here
+                    pass
                 case _:
                     raise DGPUComputeError('Unsupported compute method')
 
diff --git a/skynet/utils.py b/skynet/utils.py
index 7fc2d7d..87fcb38 100644
--- a/skynet/utils.py
+++ b/skynet/utils.py
@@ -19,7 +19,7 @@ from diffusers import (
     StableDiffusionImg2ImgPipeline,
     EulerAncestralDiscreteScheduler
 )
-from transformers import pipeline, Conversation
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from realesrgan import RealESRGANer
 from huggingface_hub import login
 
@@ -59,7 +59,7 @@ def convert_from_bytes_and_crop(raw: bytes, max_w: int, max_h: int) -> Image:
     return image.convert('RGB')
 
 
-def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
+def pipeline_for_image(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
     assert torch.cuda.is_available()
     torch.cuda.empty_cache()
     torch.cuda.set_per_process_memory_fraction(mem_fraction)
@@ -98,6 +98,41 @@ def pipeline_for(model: str, mem_fraction: float = 1.0, image=False) -> Diffusio
     return pipe.to('cuda')
 
 
+def pipeline_for_text(model: str, mem_fraction: float = 1.0, image=False) -> DiffusionPipeline:
+    assert torch.cuda.is_available()
+    torch.cuda.empty_cache()
+    torch.cuda.set_per_process_memory_fraction(mem_fraction)
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+
+    # NOTE: image could be used for image to text
+    # NOTE: note sure if this is necessary or what it does exactly
+    # full determinism
+    # https://huggingface.co/docs/diffusers/using-diffusers/reproducibility#deterministic-algorithms
+    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
+
+    torch.backends.cudnn.benchmark = False
+    torch.use_deterministic_algorithms(True)
+
+    params = {
+        'torch_dtype': torch.float16,
+        'safety_checker': None
+    }
+
+    pipe = AutoModelForCausalLM.from_pretrained(
+        model, **params
+    )
+
+    # TODO: look if scheduler is necessary and what does this code do
+    # pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
+    #     pipe.scheduler.config)
+    #
+    # if not image:
+    #     pipe.enable_vae_slicing()
+    #
+    return pipe.to('cuda')
+
+
 def txt2img(
     hf_token: str,
     model: str = 'prompthero/openjourney',
@@ -115,7 +150,7 @@ def txt2img(
     torch.backends.cudnn.allow_tf32 = True
 
     login(token=hf_token)
-    pipe = pipeline_for(model)
+    pipe = pipeline_for_image(model)
 
     seed = seed if seed else random.randint(0, 2 ** 64)
     prompt = prompt
@@ -148,7 +183,7 @@ def img2img(
     torch.backends.cudnn.allow_tf32 = True
 
     login(token=hf_token)
-    pipe = pipeline_for(model, image=True)
+    pipe = pipeline_for_image(model, image=True)
 
     with open(img_path, 'rb') as img_file:
         input_img = convert_from_bytes_and_crop(img_file.read(), 512, 512)
@@ -168,11 +203,12 @@ def img2img(
 
 def txt2txt(
     hf_token: str,
-    # TODO: change this to actual model ref
-    #       add more granular control of models
-    model: str = 'microsoft/DialoGPT-small',
+    model: str = 'tiiuae/falcon-40b-instruct',
     prompt: str = 'a red old tractor in a sunny wheat field',
     output: str = 'output.txt',
+    num_return_sequences: int = 1,
+    no_repeat_ngram_size: int = 2,
+    top_p: float = 0.95,
     temperature: float = 1.0,
     max_length: int = 256,
 ):
@@ -183,24 +219,25 @@ def txt2txt(
     torch.backends.cudnn.allow_tf32 = True
 
     login(token=hf_token)
-    chatbot = pipeline('text-generation', model=model, device_map='auto')
+    tokenizer = AutoTokenizer.from_pretrained(model)
+    pipe = pipeline_for_text(model)
 
     prompt = prompt
-    conversation = Conversation(prompt)
-    conversation = chatbot(
-        conversation,
+    # TODO: learn more about return tensors and model params
+    tokenized_input = tokenizer.encode(prompt, return_tensors='pt')
+    tokenized_output = pipe.generate(
+        tokenized_input,
         max_length=max_length,
-        do_sample=True,
+        num_return_sequences=num_return_sequences,
+        no_repeat_ngram_size=2,
+        top_p=0.95,
         temperature=temperature
     )
-    response = conversation.generated_responses[-1]
+    response = tokenizer.decode(tokenized_output, skip_special_tokens=True)
     with open(output, 'w', encoding='utf-8') as f:
         f.write(response)
 
-    # This if for continued conversatin, need to figure out how to store convo
-    # conversation.add_user_input("Is it an action movie?")
-    # conversation = chatbot(conversation)
-    # conversation.generated_responses[-1]
+    # TODO: figure out continued conversation, store data on frontend?
 
 
 def init_upscaler(model_path: str = 'weights/RealESRGAN_x4plus.pth'):
@@ -249,6 +286,6 @@ def download_all_models(hf_token: str):
     login(token=hf_token)
     for model in MODELS:
         print(f'DOWNLOADING {model.upper()}')
-        pipeline_for(model)
+        pipeline_for_image(model)
         print(f'DOWNLOADING IMAGE {model.upper()}')
-        pipeline_for(model, image=True)
+        pipeline_for_image(model, image=True)