onnx-web/api/onnx_web/diffusers/load.py

from logging import getLogger
from os import path
from re import compile
from typing import Any, List, Optional, Tuple

import numpy as np
from diffusers import (
    DDIMScheduler,
    DDPMScheduler,
    DiffusionPipeline,
    DPMSolverMultistepScheduler,
    DPMSolverSinglestepScheduler,
    EulerAncestralDiscreteScheduler,
    EulerDiscreteScheduler,
    HeunDiscreteScheduler,
    IPNDMScheduler,
    KarrasVeScheduler,
    KDPM2AncestralDiscreteScheduler,
    KDPM2DiscreteScheduler,
    LMSDiscreteScheduler,
    OnnxRuntimeModel,
    PNDMScheduler,
    StableDiffusionPipeline,
)
from onnxruntime import SessionOptions
from transformers import CLIPTokenizer

from onnx_web.diffusers.utils import expand_prompt

try:
    from diffusers import DEISMultistepScheduler
except ImportError:
    from ..diffusers.stub_scheduler import StubScheduler as DEISMultistepScheduler

try:
    from diffusers import UniPCMultistepScheduler
except ImportError:
    from ..diffusers.stub_scheduler import StubScheduler as UniPCMultistepScheduler

from ..convert.diffusion.lora import merge_lora, buffer_external_data_tensors
from ..params import DeviceParams, Size
from ..server import ServerContext
from ..utils import run_gc

logger = getLogger(__name__)

latent_channels = 4
latent_factor = 8

pipeline_schedulers = {
    "ddim": DDIMScheduler,
    "ddpm": DDPMScheduler,
    "deis-multi": DEISMultistepScheduler,
    "dpm-multi": DPMSolverMultistepScheduler,
    "dpm-single": DPMSolverSinglestepScheduler,
    "euler": EulerDiscreteScheduler,
    "euler-a": EulerAncestralDiscreteScheduler,
    "heun": HeunDiscreteScheduler,
    "ipndm": IPNDMScheduler,
    "k-dpm-2-a": KDPM2AncestralDiscreteScheduler,
    "k-dpm-2": KDPM2DiscreteScheduler,
    "karras-ve": KarrasVeScheduler,
    "lms-discrete": LMSDiscreteScheduler,
    "pndm": PNDMScheduler,
    "unipc-multi": UniPCMultistepScheduler,
}


def get_pipeline_schedulers():
    return pipeline_schedulers


def get_scheduler_name(scheduler: Any) -> Optional[str]:
    for k, v in pipeline_schedulers.items():
        if scheduler == v or scheduler == v.__name__:
            return k

    return None


def get_latents_from_seed(seed: int, size: Size, batch: int = 1) -> np.ndarray:
    """
    From https://www.travelneil.com/stable-diffusion-updates.html.
    This one needs to use np.random because of the return type.
    """
    latents_shape = (
        batch,
        latent_channels,
        size.height // latent_factor,
        size.width // latent_factor,
    )
    rng = np.random.default_rng(seed)
    image_latents = rng.standard_normal(latents_shape).astype(np.float32)
    return image_latents


def get_tile_latents(
    full_latents: np.ndarray, dims: Tuple[int, int, int]
) -> np.ndarray:
    x, y, tile = dims
    t = tile // latent_factor
    x = x // latent_factor
    y = y // latent_factor
    xt = x + t
    yt = y + t

    return full_latents[:, :, y:yt, x:xt]


def optimize_pipeline(
    server: ServerContext,
    pipe: StableDiffusionPipeline,
) -> None:
    if "diffusers-attention-slicing" in server.optimizations:
        logger.debug("enabling attention slicing on SD pipeline")
        try:
            pipe.enable_attention_slicing()
        except Exception as e:
            logger.warning("error while enabling attention slicing: %s", e)

    if "diffusers-vae-slicing" in server.optimizations:
        logger.debug("enabling VAE slicing on SD pipeline")
        try:
            pipe.enable_vae_slicing()
        except Exception as e:
            logger.warning("error while enabling VAE slicing: %s", e)

    if "diffusers-cpu-offload-sequential" in server.optimizations:
        logger.debug("enabling sequential CPU offload on SD pipeline")
        try:
            pipe.enable_sequential_cpu_offload()
        except Exception as e:
            logger.warning("error while enabling sequential CPU offload: %s", e)

    elif "diffusers-cpu-offload-model" in server.optimizations:
        # TODO: check for accelerate
        logger.debug("enabling model CPU offload on SD pipeline")
        try:
            pipe.enable_model_cpu_offload()
        except Exception as e:
            logger.warning("error while enabling model CPU offload: %s", e)

    if "diffusers-memory-efficient-attention" in server.optimizations:
        # TODO: check for xformers
        logger.debug("enabling memory efficient attention for SD pipeline")
        try:
            pipe.enable_xformers_memory_efficient_attention()
        except Exception as e:
            logger.warning("error while enabling memory efficient attention: %s", e)


def load_pipeline(
    server: ServerContext,
    pipeline: DiffusionPipeline,
    model: str,
    scheduler_name: str,
    device: DeviceParams,
    lpw: bool,
    inversions: Optional[List[Tuple[str, float]]] = None,
    loras: Optional[List[Tuple[str, float]]] = None,
):
    loras = loras or []
    pipe_key = (
        pipeline.__name__,
        model,
        device.device,
        device.provider,
        lpw,
        inversions,
        loras,
    )
    scheduler_key = (scheduler_name, model)
    scheduler_type = get_pipeline_schedulers()[scheduler_name]

    cache_pipe = server.cache.get("diffusion", pipe_key)

    if cache_pipe is not None:
        logger.debug("reusing existing diffusion pipeline")
        pipe = cache_pipe

        cache_scheduler = server.cache.get("scheduler", scheduler_key)
        if cache_scheduler is None:
            logger.debug("loading new diffusion scheduler")
            scheduler = scheduler_type.from_pretrained(
                model,
                provider=device.ort_provider(),
                sess_options=device.sess_options(),
                subfolder="scheduler",
            )

            if device is not None and hasattr(scheduler, "to"):
                scheduler = scheduler.to(device.torch_str())

            pipe.scheduler = scheduler
            server.cache.set("scheduler", scheduler_key, scheduler)
            run_gc([device])

    else:
        if server.cache.drop("diffusion", pipe_key) > 0:
            logger.debug("unloading previous diffusion pipeline")
            run_gc([device])

        if lpw:
            custom_pipeline = "./onnx_web/diffusers/lpw_stable_diffusion_onnx.py"
        else:
            custom_pipeline = None

        logger.debug("loading new diffusion pipeline from %s", model)
        components = {
            "scheduler": scheduler_type.from_pretrained(
                model,
                provider=device.ort_provider(),
                sess_options=device.sess_options(),
                subfolder="scheduler",
            )
        }

        if inversions is not None and len(inversions) > 0:
            inversion = "inversion-" + inversions[0][0]
            logger.debug("loading Textual Inversion from %s", inversion)
            # TODO: blend the inversion models
            components["text_encoder"] = OnnxRuntimeModel.from_pretrained(
                path.join(server.model_path, inversion, "text_encoder"),
                provider=device.ort_provider(),
                sess_options=device.sess_options(),
            )
            components["tokenizer"] = CLIPTokenizer.from_pretrained(
                path.join(server.model_path, inversions, "tokenizer"),
            )

        # test LoRA blending
        if loras is not None and len(loras) > 0:
            lora_names, lora_weights = zip(*loras)
            lora_models = [path.join(server.model_path, "lora", f"{name}.safetensors") for name in lora_names]
            logger.info("blending base model %s with LoRA models: %s", model, lora_models)

            # blend and load text encoder
            blended_text_encoder = merge_lora(path.join(model, "text_encoder", "model.onnx"), lora_models, "text_encoder", lora_weights=lora_weights)
            (text_encoder_model, text_encoder_data) = buffer_external_data_tensors(blended_text_encoder)
            text_encoder_names, text_encoder_values = zip(*text_encoder_data)
            text_encoder_opts = SessionOptions()
            text_encoder_opts.add_external_initializers(list(text_encoder_names), list(text_encoder_values))
            components["text_encoder"] = OnnxRuntimeModel(
                OnnxRuntimeModel.load_model(
                    text_encoder_model.SerializeToString(),
                    provider=device.ort_provider(),
                    sess_options=text_encoder_opts,
                )
            )

            # blend and load unet
            blended_unet = merge_lora(path.join(model, "unet", "model.onnx"), lora_models, "unet", lora_weights=lora_weights)
            (unet_model, unet_data) = buffer_external_data_tensors(blended_unet)
            unet_names, unet_values = zip(*unet_data)
            unet_opts = SessionOptions()
            unet_opts.add_external_initializers(list(unet_names), list(unet_values))
            components["unet"] = OnnxRuntimeModel(
                OnnxRuntimeModel.load_model(
                    unet_model.SerializeToString(),
                    provider=device.ort_provider(),
                    sess_options=unet_opts,
                )
            )

        pipe = pipeline.from_pretrained(
            model,
            custom_pipeline=custom_pipeline,
            provider=device.ort_provider(),
            sess_options=device.sess_options(),
            revision="onnx",
            safety_checker=None,
            **components,
        )

        if not server.show_progress:
            pipe.set_progress_bar_config(disable=True)

        optimize_pipeline(server, pipe)

        if device is not None and hasattr(pipe, "to"):
            pipe = pipe.to(device.torch_str())

        # monkey-patch pipeline
        if not lpw:
            pipe._encode_prompt = expand_prompt.__get__(pipe, pipeline)

        server.cache.set("diffusion", pipe_key, pipe)
        server.cache.set("scheduler", scheduler_key, components["scheduler"])

    return pipe
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`from logging import getLogger`
load from encoder path, add default param 2023-02-22 05:16:13 +00:00			`from os import path`
feat(api): parse LoRA names from prompt 2023-03-15 03:28:18 +00:00			`from re import compile`
start wiring LoRAs into prompt 2023-03-15 03:10:33 +00:00			`from typing import Any, List, Optional, Tuple`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
			`import numpy as np`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`from diffusers import (`
			`DDIMScheduler,`
			`DDPMScheduler,`
lint all the new stuff 2023-02-26 20:15:30 +00:00			`DiffusionPipeline,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`DPMSolverMultistepScheduler,`
			`DPMSolverSinglestepScheduler,`
			`EulerAncestralDiscreteScheduler,`
			`EulerDiscreteScheduler,`
			`HeunDiscreteScheduler,`
feat: add iPNDM scheduler 2023-02-14 13:27:51 +00:00			`IPNDMScheduler,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`KarrasVeScheduler,`
			`KDPM2AncestralDiscreteScheduler,`
			`KDPM2DiscreteScheduler,`
			`LMSDiscreteScheduler,`
lint all the new stuff 2023-02-26 20:15:30 +00:00			`OnnxRuntimeModel,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`PNDMScheduler,`
lint all the new stuff 2023-02-26 20:15:30 +00:00			`StableDiffusionPipeline,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`)`
test loading UNet and encoder with LoRAs 2023-03-15 02:27:23 +00:00			`from onnxruntime import SessionOptions`
fix(api): load tokenizer with textual inversions 2023-03-02 01:08:31 +00:00			`from transformers import CLIPTokenizer`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00
feat(api): add support for extremely long prompts 2023-03-08 01:00:25 +00:00			`from onnx_web.diffusers.utils import expand_prompt`

feat: add support for DEIS multistep scheduler 2023-02-14 04:37:54 +00:00			`try:`
			`from diffusers import DEISMultistepScheduler`
apply lint 2023-02-14 06:12:07 +00:00			`except ImportError:`
fix(api): correct imports 2023-03-05 04:25:49 +00:00			`from ..diffusers.stub_scheduler import StubScheduler as DEISMultistepScheduler`
feat: add support for DEIS multistep scheduler 2023-02-14 04:37:54 +00:00
feat: add support for UniPC multistep scheduler (#226) 2023-03-09 14:00:59 +00:00			`try:`
			`from diffusers import UniPCMultistepScheduler`
			`except ImportError:`
			`from ..diffusers.stub_scheduler import StubScheduler as UniPCMultistepScheduler`

feat(api): parse LoRA weights from prompt 2023-03-15 13:30:31 +00:00			`from ..convert.diffusion.lora import merge_lora, buffer_external_data_tensors`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`from ..params import DeviceParams, Size`
fix imports 2023-02-19 02:28:21 +00:00			`from ..server import ServerContext`
			`from ..utils import run_gc`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
			`logger = getLogger(__name__)`

lint(api): use constant for latents scale factor 2023-02-02 04:21:22 +00:00			`latent_channels = 4`
			`latent_factor = 8`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`pipeline_schedulers = {`
			`"ddim": DDIMScheduler,`
			`"ddpm": DDPMScheduler,`
feat: add support for DEIS multistep scheduler 2023-02-14 04:37:54 +00:00			`"deis-multi": DEISMultistepScheduler,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`"dpm-multi": DPMSolverMultistepScheduler,`
			`"dpm-single": DPMSolverSinglestepScheduler,`
			`"euler": EulerDiscreteScheduler,`
			`"euler-a": EulerAncestralDiscreteScheduler,`
			`"heun": HeunDiscreteScheduler,`
feat: add iPNDM scheduler 2023-02-14 13:27:51 +00:00			`"ipndm": IPNDMScheduler,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`"k-dpm-2-a": KDPM2AncestralDiscreteScheduler,`
			`"k-dpm-2": KDPM2DiscreteScheduler,`
			`"karras-ve": KarrasVeScheduler,`
			`"lms-discrete": LMSDiscreteScheduler,`
			`"pndm": PNDMScheduler,`
feat: add support for UniPC multistep scheduler (#226) 2023-03-09 14:00:59 +00:00			`"unipc-multi": UniPCMultistepScheduler,`
feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`}`

pass progress on to most stages 2023-02-12 18:33:36 +00:00
feat(api): remove Flask app from global scope 2023-02-26 16:15:12 +00:00			`def get_pipeline_schedulers():`
			`return pipeline_schedulers`


feat: show model in image card, use labels for model and scheduler (#104) 2023-02-12 15:51:35 +00:00			`def get_scheduler_name(scheduler: Any) -> Optional[str]:`
			`for k, v in pipeline_schedulers.items():`
			`if scheduler == v or scheduler == v.__name__:`
			`return k`

			`return None`

lint(api): use constant for latents scale factor 2023-02-02 04:21:22 +00:00
			`def get_latents_from_seed(seed: int, size: Size, batch: int = 1) -> np.ndarray:`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`"""`
fix(api): replace some numpy RNGs with torch equivalent 2023-02-05 19:43:33 +00:00			`From https://www.travelneil.com/stable-diffusion-updates.html.`
			`This one needs to use np.random because of the return type.`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`"""`
			`latents_shape = (`
			`batch,`
			`latent_channels,`
			`size.height // latent_factor,`
			`size.width // latent_factor,`
			`)`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`rng = np.random.default_rng(seed)`
			`image_latents = rng.standard_normal(latents_shape).astype(np.float32)`
			`return image_latents`


lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`def get_tile_latents(`
			`full_latents: np.ndarray, dims: Tuple[int, int, int]`
			`) -> np.ndarray:`
feat(api): use per-tile latents for outpainting 2023-01-29 16:31:22 +00:00			`x, y, tile = dims`
lint(api): use constant for latents scale factor 2023-02-02 04:21:22 +00:00			`t = tile // latent_factor`
			`x = x // latent_factor`
			`y = y // latent_factor`
feat(api): use per-tile latents for outpainting 2023-01-29 16:31:22 +00:00			`xt = x + t`
			`yt = y + t`

fix(api): run garbage collection after each model change 2023-02-02 03:20:48 +00:00			`return full_latents[:, :, y:yt, x:xt]`
feat(api): use per-tile latents for outpainting 2023-01-29 16:31:22 +00:00

feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`def optimize_pipeline(`
			`server: ServerContext,`
			`pipe: StableDiffusionPipeline,`
			`) -> None:`
chore(docs): explain model optimizations 2023-02-18 22:06:05 +00:00			`if "diffusers-attention-slicing" in server.optimizations:`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`logger.debug("enabling attention slicing on SD pipeline")`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00			`try:`
			`pipe.enable_attention_slicing()`
			`except Exception as e:`
fix(api): restore separate upscale and correction stages 2023-02-18 17:59:39 +00:00			`logger.warning("error while enabling attention slicing: %s", e)`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00
chore(docs): explain model optimizations 2023-02-18 22:06:05 +00:00			`if "diffusers-vae-slicing" in server.optimizations:`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`logger.debug("enabling VAE slicing on SD pipeline")`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00			`try:`
			`pipe.enable_vae_slicing()`
			`except Exception as e:`
fix(api): restore separate upscale and correction stages 2023-02-18 17:59:39 +00:00			`logger.warning("error while enabling VAE slicing: %s", e)`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00
chore(docs): explain model optimizations 2023-02-18 22:06:05 +00:00			`if "diffusers-cpu-offload-sequential" in server.optimizations:`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`logger.debug("enabling sequential CPU offload on SD pipeline")`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00			`try:`
			`pipe.enable_sequential_cpu_offload()`
			`except Exception as e:`
fix(api): restore separate upscale and correction stages 2023-02-18 17:59:39 +00:00			`logger.warning("error while enabling sequential CPU offload: %s", e)`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00
chore(docs): explain model optimizations 2023-02-18 22:06:05 +00:00			`elif "diffusers-cpu-offload-model" in server.optimizations:`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`# TODO: check for accelerate`
			`logger.debug("enabling model CPU offload on SD pipeline")`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00			`try:`
			`pipe.enable_model_cpu_offload()`
			`except Exception as e:`
fix(api): restore separate upscale and correction stages 2023-02-18 17:59:39 +00:00			`logger.warning("error while enabling model CPU offload: %s", e)`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00
chore(docs): explain model optimizations 2023-02-18 22:06:05 +00:00			`if "diffusers-memory-efficient-attention" in server.optimizations:`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`# TODO: check for xformers`
			`logger.debug("enabling memory efficient attention for SD pipeline")`
fix(api): add error handling for optimizations 2023-02-18 17:57:18 +00:00			`try:`
			`pipe.enable_xformers_memory_efficient_attention()`
			`except Exception as e:`
fix(api): restore separate upscale and correction stages 2023-02-18 17:59:39 +00:00			`logger.warning("error while enabling memory efficient attention: %s", e)`
feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00

lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`def load_pipeline(`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00			`server: ServerContext,`
apply lint fixes again 2023-02-05 23:55:04 +00:00			`pipeline: DiffusionPipeline,`
			`model: str,`
feat(api): remove Flask app from global scope 2023-02-26 16:15:12 +00:00			`scheduler_name: str,`
apply lint fixes again 2023-02-05 23:55:04 +00:00			`device: DeviceParams,`
			`lpw: bool,`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`inversions: Optional[List[Tuple[str, float]]] = None,`
feat(api): parse LoRA weights from prompt 2023-03-15 13:30:31 +00:00			`loras: Optional[List[Tuple[str, float]]] = None,`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`):`
start wiring LoRAs into prompt 2023-03-15 03:10:33 +00:00			`loras = loras or []`
apply lint 2023-03-07 14:02:53 +00:00			`pipe_key = (`
			`pipeline.__name__,`
			`model,`
			`device.device,`
			`device.provider,`
			`lpw,`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`inversions,`
start wiring LoRAs into prompt 2023-03-15 03:10:33 +00:00			`loras,`
apply lint 2023-03-07 14:02:53 +00:00			`)`
feat(api): remove Flask app from global scope 2023-02-26 16:15:12 +00:00			`scheduler_key = (scheduler_name, model)`
			`scheduler_type = get_pipeline_schedulers()[scheduler_name]`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00
			`cache_pipe = server.cache.get("diffusion", pipe_key)`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00			`if cache_pipe is not None:`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`logger.debug("reusing existing diffusion pipeline")`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00			`pipe = cache_pipe`

			`cache_scheduler = server.cache.get("scheduler", scheduler_key)`
			`if cache_scheduler is None:`
			`logger.debug("loading new diffusion scheduler")`
			`scheduler = scheduler_type.from_pretrained(`
			`model,`
fix(api): pass both device and session options to ORT (#38) 2023-02-15 00:57:50 +00:00			`provider=device.ort_provider(),`
			`sess_options=device.sess_options(),`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00			`subfolder="scheduler",`
			`)`

			`if device is not None and hasattr(scheduler, "to"):`
fix(api): run torch gc alongside python (#156) 2023-02-17 00:11:35 +00:00			`scheduler = scheduler.to(device.torch_str())`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00
			`pipe.scheduler = scheduler`
			`server.cache.set("scheduler", scheduler_key, scheduler)`
fix(api): run torch gc alongside python (#156) 2023-02-17 00:11:35 +00:00			`run_gc([device])`
feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`else:`
fix(api): only run GC when a diffusion model has been replaced 2023-03-11 14:06:03 +00:00			`if server.cache.drop("diffusion", pipe_key) > 0:`
			`logger.debug("unloading previous diffusion pipeline")`
			`run_gc([device])`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
feat(api): make LPW an image parameter 2023-02-05 23:15:37 +00:00			`if lpw:`
fix(api): load LPW pipeline from local source (#224) 2023-03-09 01:47:22 +00:00			`custom_pipeline = "./onnx_web/diffusers/lpw_stable_diffusion_onnx.py"`
feat(api): make LPW an image parameter 2023-02-05 23:15:37 +00:00			`else:`
			`custom_pipeline = None`

lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`logger.debug("loading new diffusion pipeline from %s", model)`
avoid passing None as encoder 2023-02-25 19:12:58 +00:00			`components = {`
			`"scheduler": scheduler_type.from_pretrained(`
			`model,`
			`provider=device.ort_provider(),`
			`sess_options=device.sess_options(),`
			`subfolder="scheduler",`
			`)`
			`}`
feat: add a way to select textual inversions 2023-02-22 05:08:13 +00:00
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`if inversions is not None and len(inversions) > 0:`
add prefix to inversion path 2023-03-15 13:55:35 +00:00			`inversion = "inversion-" + inversions[0][0]`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`logger.debug("loading Textual Inversion from %s", inversion)`
			`# TODO: blend the inversion models`
avoid passing None as encoder 2023-02-25 19:12:58 +00:00			`components["text_encoder"] = OnnxRuntimeModel.from_pretrained(`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`path.join(server.model_path, inversion, "text_encoder"),`
feat: add a way to select textual inversions 2023-02-22 05:08:13 +00:00			`provider=device.ort_provider(),`
			`sess_options=device.sess_options(),`
			`)`
fix(api): load tokenizer with textual inversions 2023-03-02 01:08:31 +00:00			`components["tokenizer"] = CLIPTokenizer.from_pretrained(`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`path.join(server.model_path, inversions, "tokenizer"),`
fix(api): load tokenizer with textual inversions 2023-03-02 01:08:31 +00:00			`)`
feat: add a way to select textual inversions 2023-02-22 05:08:13 +00:00
test loading UNet and encoder with LoRAs 2023-03-15 02:27:23 +00:00			`# test LoRA blending`
feat(api): load Textual Inversions from prompt 2023-03-15 13:51:12 +00:00			`if loras is not None and len(loras) > 0:`
fix error when prompt has no LoRA tokens 2023-03-15 13:35:44 +00:00			`lora_names, lora_weights = zip(*loras)`
			`lora_models = [path.join(server.model_path, "lora", f"{name}.safetensors") for name in lora_names]`
			`logger.info("blending base model %s with LoRA models: %s", model, lora_models)`
test loading UNet and encoder with LoRAs 2023-03-15 02:27:23 +00:00
feat(api): parse LoRA weights from prompt 2023-03-15 13:30:31 +00:00			`# blend and load text encoder`
			`blended_text_encoder = merge_lora(path.join(model, "text_encoder", "model.onnx"), lora_models, "text_encoder", lora_weights=lora_weights)`
			`(text_encoder_model, text_encoder_data) = buffer_external_data_tensors(blended_text_encoder)`
			`text_encoder_names, text_encoder_values = zip(*text_encoder_data)`
			`text_encoder_opts = SessionOptions()`
			`text_encoder_opts.add_external_initializers(list(text_encoder_names), list(text_encoder_values))`
			`components["text_encoder"] = OnnxRuntimeModel(`
			`OnnxRuntimeModel.load_model(`
			`text_encoder_model.SerializeToString(),`
			`provider=device.ort_provider(),`
			`sess_options=text_encoder_opts,`
			`)`
start wiring LoRAs into prompt 2023-03-15 03:10:33 +00:00			`)`
test loading UNet and encoder with LoRAs 2023-03-15 02:27:23 +00:00
feat(api): parse LoRA weights from prompt 2023-03-15 13:30:31 +00:00			`# blend and load unet`
			`blended_unet = merge_lora(path.join(model, "unet", "model.onnx"), lora_models, "unet", lora_weights=lora_weights)`
			`(unet_model, unet_data) = buffer_external_data_tensors(blended_unet)`
			`unet_names, unet_values = zip(*unet_data)`
			`unet_opts = SessionOptions()`
			`unet_opts.add_external_initializers(list(unet_names), list(unet_values))`
			`components["unet"] = OnnxRuntimeModel(`
			`OnnxRuntimeModel.load_model(`
			`unet_model.SerializeToString(),`
			`provider=device.ort_provider(),`
			`sess_options=unet_opts,`
			`)`
start wiring LoRAs into prompt 2023-03-15 03:10:33 +00:00			`)`
test loading UNet and encoder with LoRAs 2023-03-15 02:27:23 +00:00
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`pipe = pipeline.from_pretrained(`
			`model,`
feat(api): make LPW an image parameter 2023-02-05 23:15:37 +00:00			`custom_pipeline=custom_pipeline,`
fix(api): pass both device and session options to ORT (#38) 2023-02-15 00:57:50 +00:00			`provider=device.ort_provider(),`
			`sess_options=device.sess_options(),`
fix(api): embed LPW pipeline (fixes #96) 2023-02-05 22:01:11 +00:00			`revision="onnx",`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`safety_checker=None,`
avoid passing None as encoder 2023-02-25 19:12:58 +00:00			`**components,`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00			`)`

fix(api): disable progress in diffusers pipelines 2023-02-18 15:42:38 +00:00			`if not server.show_progress:`
			`pipe.set_progress_bar_config(disable=True)`

feat(api): enable optimizations for SD pipelines based on env vars (#155) 2023-02-18 17:53:13 +00:00			`optimize_pipeline(server, pipe)`

lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`if device is not None and hasattr(pipe, "to"):`
fix(api): run torch gc alongside python (#156) 2023-02-17 00:11:35 +00:00			`pipe = pipe.to(device.torch_str())`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
feat(api): add support for extremely long prompts 2023-03-08 01:00:25 +00:00			`# monkey-patch pipeline`
			`if not lpw:`
			`pipe._encode_prompt = expand_prompt.__get__(pipe, pipeline)`

feat(api): add model cache for diffusion models 2023-02-14 00:04:46 +00:00			`server.cache.set("diffusion", pipe_key, pipe)`
fix scheduler caching 2023-02-25 19:14:34 +00:00			`server.cache.set("scheduler", scheduler_key, components["scheduler"])`
fix(api): avoid circular deps in diffusion pipeline cache 2023-01-29 02:15:39 +00:00
			`return pipe`