onnx-web/api/onnx_web/chain/source_txt2img.py

from logging import getLogger
from typing import Optional, Tuple

import numpy as np
import torch

from ..constants import LATENT_FACTOR
from ..diffusers.load import load_pipeline
from ..diffusers.utils import (
    encode_prompt,
    get_latents_from_seed,
    get_tile_latents,
    parse_prompt,
    parse_reseed,
    slice_prompt,
)
from ..params import ImageParams, Size, SizeChart, StageParams
from ..server import ServerContext
from ..worker import ProgressCallback, WorkerContext
from .base import BaseStage
from .result import ImageMetadata, StageResult

logger = getLogger(__name__)


class SourceTxt2ImgStage(BaseStage):
    max_tile = SizeChart.max

    def run(
        self,
        worker: WorkerContext,
        server: ServerContext,
        stage: StageParams,
        params: ImageParams,
        sources: StageResult,
        *,
        dims: Tuple[int, int, int] = None,
        size: Size,
        callback: Optional[ProgressCallback] = None,
        latents: Optional[np.ndarray] = None,
        prompt_index: Optional[int] = None,
        **kwargs,
    ) -> StageResult:
        params = params.with_args(**kwargs)
        size = size.with_args(**kwargs)

        # multi-stage prompting
        if prompt_index is not None:
            params = params.with_args(prompt=slice_prompt(params.prompt, prompt_index))

        logger.info(
            "generating image using txt2img, %s steps of %s: %s",
            params.steps,
            params.model,
            params.prompt,
        )

        if len(sources):
            logger.info(
                "source images were passed to a source stage, new images will be appended"
            )

        prompt_pairs, loras, inversions, (prompt, negative_prompt) = parse_prompt(
            params
        )

        if params.is_panorama() or params.is_xl():
            tile_size = max(stage.tile_size, params.unet_tile)
        else:
            tile_size = params.unet_tile

        # this works for panorama as well, because tile_size is already max(tile_size, *size)
        latent_size = size.min(tile_size, tile_size)

        # generate new latents or slice existing
        if latents is None:
            latents = get_latents_from_seed(int(params.seed), latent_size, params.batch)
        else:
            latents = get_tile_latents(latents, int(params.seed), latent_size, dims)

        # reseed latents as needed
        reseed_rng = np.random.RandomState(params.seed)
        prompt, reseed = parse_reseed(prompt)
        for top, left, bottom, right, region_seed in reseed:
            if region_seed == -1:
                region_seed = reseed_rng.random_integers(2**32 - 1)

            logger.debug(
                "reseed latent region: [:, :, %s:%s, %s:%s] with %s",
                top,
                left,
                bottom,
                right,
                region_seed,
            )
            latents[
                :,
                :,
                top // LATENT_FACTOR : bottom // LATENT_FACTOR,
                left // LATENT_FACTOR : right // LATENT_FACTOR,
            ] = get_latents_from_seed(
                region_seed, Size(right - left, bottom - top), params.batch
            )

        pipe_type = params.get_valid_pipeline("txt2img")
        pipe = load_pipeline(
            server,
            params,
            pipe_type,
            worker.get_device(),
            embeddings=inversions,
            loras=loras,
        )

        if params.is_lpw():
            logger.debug("using LPW pipeline for txt2img")
            rng = torch.manual_seed(params.seed)
            output = pipe.text2img(
                prompt,
                height=latent_size.height,
                width=latent_size.width,
                generator=rng,
                guidance_scale=params.cfg,
                latents=latents,
                negative_prompt=negative_prompt,
                num_images_per_prompt=params.batch,
                num_inference_steps=params.steps,
                eta=params.eta,
                callback=callback,
            )
        else:
            # encode and record alternative prompts outside of LPW
            if params.is_panorama() or params.is_xl():
                logger.debug(
                    "prompt alternatives are not supported for panorama or SDXL"
                )
            else:
                prompt_embeds = encode_prompt(
                    pipe, prompt_pairs, params.batch, params.do_cfg()
                )
                pipe.unet.set_prompts(prompt_embeds)

            rng = np.random.RandomState(params.seed)
            output = pipe(
                prompt,
                height=latent_size.height,
                width=latent_size.width,
                generator=rng,
                guidance_scale=params.cfg,
                latents=latents,
                negative_prompt=negative_prompt,
                num_images_per_prompt=params.batch,
                num_inference_steps=params.steps,
                eta=params.eta,
                callback=callback,
            )

        result = StageResult(source=sources)
        for image in output.images:
            result.push_image(
                image, ImageMetadata(params, size, inversions=inversions, loras=loras)
            )

        logger.debug("produced %s outputs", len(result))
        return result

    def steps(
        self,
        params: ImageParams,
        size: Size,
    ) -> int:
        return params.steps

    def outputs(
        self,
        params: ImageParams,
        sources: int,
    ) -> int:
        return sources + 1
feat(api): switch to python logging 2023-01-28 23:09:19 +00:00			`from logging import getLogger`
apply lint, add missing file 2023-11-19 00:13:13 +00:00			`from typing import Optional, Tuple`
feat(api): synthesize a mask for outpaint stages 2023-01-28 14:37:17 +00:00
fix(api): switch RNG based on LPW parameter 2023-02-05 23:24:08 +00:00			`import numpy as np`
apply lint fixes again 2023-02-05 23:55:04 +00:00			`import torch`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00
dedupe new latent resizing code 2023-12-03 17:11:23 +00:00			`from ..constants import LATENT_FACTOR`
feat(api): parse alternative groups in prompts 2023-04-23 20:03:11 +00:00			`from ..diffusers.load import load_pipeline`
apply lint 2023-07-10 22:48:26 +00:00			`from ..diffusers.utils import (`
			`encode_prompt,`
			`get_latents_from_seed,`
			`get_tile_latents,`
			`parse_prompt,`
feat(api): add tokens to reseed region 2023-11-11 00:37:42 +00:00			`parse_reseed,`
feat(api): add an experimental multi-prompt syntax 2023-08-26 04:31:43 +00:00			`slice_prompt,`
apply lint 2023-07-10 22:48:26 +00:00			`)`
feat(api): pass tile size param to most pipeline stages 2023-07-02 23:54:10 +00:00			`from ..params import ImageParams, Size, SizeChart, StageParams`
background workers, logger 2023-02-26 05:49:39 +00:00			`from ..server import ServerContext`
lint all the new stuff 2023-02-26 20:15:30 +00:00			`from ..worker import ProgressCallback, WorkerContext`
feat(api): add chain pipeline stage result type 2023-11-18 23:18:23 +00:00			`from .base import BaseStage`
attempt to save grid mode metadata 2024-01-03 03:49:22 +00:00			`from .result import ImageMetadata, StageResult`
feat(api): synthesize a mask for outpaint stages 2023-01-28 14:37:17 +00:00
feat(api): switch to python logging 2023-01-28 23:09:19 +00:00			`logger = getLogger(__name__)`

feat(api): synthesize a mask for outpaint stages 2023-01-28 14:37:17 +00:00
lint(api): make all other stages inherit from base stage 2023-07-02 23:21:21 +00:00			`class SourceTxt2ImgStage(BaseStage):`
remove unused schema, lint 2023-09-15 13:40:56 +00:00			`max_tile = SizeChart.max`
feat(api): pass tile size param to most pipeline stages 2023-07-02 23:54:10 +00:00
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`def run(`
			`self,`
lint(api): use more accurate worker name throughout 2023-07-15 23:54:54 +00:00			`worker: WorkerContext,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`server: ServerContext,`
fix initial tile size for XL 2023-09-05 02:18:07 +00:00			`stage: StageParams,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`params: ImageParams,`
start replacing image output with results 2023-11-19 00:08:38 +00:00			`sources: StageResult,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`*,`
fix order, parse right data, use output names 2023-09-11 04:06:00 +00:00			`dims: Tuple[int, int, int] = None,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`size: Size,`
			`callback: Optional[ProgressCallback] = None,`
fix(api): generate latents before tiling 2023-07-10 03:19:02 +00:00			`latents: Optional[np.ndarray] = None,`
feat(api): allow a different prompt for each highres stage 2023-08-30 01:53:16 +00:00			`prompt_index: Optional[int] = None,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`**kwargs,`
lint and test fixes 2023-12-03 18:53:50 +00:00			`) -> StageResult:`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`params = params.with_args(**kwargs)`
			`size = size.with_args(**kwargs)`
linting 2023-07-09 05:02:27 +00:00
feat(api): allow a different prompt for each highres stage 2023-08-30 01:53:16 +00:00			`# multi-stage prompting`
			`if prompt_index is not None:`
			`params = params.with_args(prompt=slice_prompt(params.prompt, prompt_index))`
feat(api): add an experimental multi-prompt syntax 2023-08-26 04:31:43 +00:00
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`logger.info(`
add more misc logging 2023-11-06 14:48:35 +00:00			`"generating image using txt2img, %s steps of %s: %s",`
			`params.steps,`
			`params.model,`
			`params.prompt,`
lint(api): apply black and isort style 2023-02-05 13:53:26 +00:00			`)`
feat(api): synthesize a mask for outpaint stages 2023-01-28 14:37:17 +00:00
read chain pipeline from JSON, remove new endpoint 2023-09-11 01:59:13 +00:00			`if len(sources):`
			`logger.info(`
			`"source images were passed to a source stage, new images will be appended"`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`)`
feat(api): start using chain pipelines for all images 2023-06-30 04:06:36 +00:00
fix(api): keep network tokens while replacing wildcards in the saved prompt 2023-07-07 01:39:08 +00:00			`prompt_pairs, loras, inversions, (prompt, negative_prompt) = parse_prompt(`
			`params`
			`)`
linting 2023-07-09 05:02:27 +00:00
fix(api): use full-size latents for all panorama pipelines 2023-11-15 02:59:51 +00:00			`if params.is_panorama() or params.is_xl():`
feat: split up UNet and VAE tile size and overlap/stride params 2023-11-05 01:41:58 +00:00			`tile_size = max(stage.tile_size, params.unet_tile)`
fix initial tile size for XL 2023-09-05 02:18:07 +00:00			`else:`
feat: split up UNet and VAE tile size and overlap/stride params 2023-11-05 01:41:58 +00:00			`tile_size = params.unet_tile`
fix(api): defer txt2img tiling to panorama pipeline when selected 2023-08-20 20:18:30 +00:00
			`# this works for panorama as well, because tile_size is already max(tile_size, *size)`
fix(api): improve handling of non-square images around tile size 2023-07-10 22:41:08 +00:00			`latent_size = size.min(tile_size, tile_size)`
use tile size when completing partial latents 2023-07-10 04:49:34 +00:00
fix(api): generate latents before tiling 2023-07-10 03:19:02 +00:00			`# generate new latents or slice existing`
			`if latents is None:`
skip controlnet in chains for now, remove empty source image 2023-09-11 02:17:09 +00:00			`latents = get_latents_from_seed(int(params.seed), latent_size, params.batch)`
linting 2023-07-09 05:02:27 +00:00			`else:`
skip controlnet in chains for now, remove empty source image 2023-09-11 02:17:09 +00:00			`latents = get_tile_latents(latents, int(params.seed), latent_size, dims)`
linting 2023-07-09 05:02:27 +00:00
feat(api): add tokens to reseed region 2023-11-11 00:37:42 +00:00			`# reseed latents as needed`
feat(api): add feature flag for single-tile panorama highres 2023-11-11 23:03:01 +00:00			`reseed_rng = np.random.RandomState(params.seed)`
feat(api): add tokens to reseed region 2023-11-11 00:37:42 +00:00			`prompt, reseed = parse_reseed(prompt)`
			`for top, left, bottom, right, region_seed in reseed:`
fix(api): allow random seed in reseed regions 2023-11-11 20:37:23 +00:00			`if region_seed == -1:`
feat(api): add feature flag for single-tile panorama highres 2023-11-11 23:03:01 +00:00			`region_seed = reseed_rng.random_integers(2**32 - 1)`
fix(api): allow random seed in reseed regions 2023-11-11 20:37:23 +00:00
feat(api): add tokens to reseed region 2023-11-11 00:37:42 +00:00			`logger.debug(`
			`"reseed latent region: [:, :, %s:%s, %s:%s] with %s",`
			`top,`
			`left,`
			`bottom,`
			`right,`
			`region_seed,`
			`)`
			`latents[`
fix(api): allow random seed in reseed regions 2023-11-11 20:37:23 +00:00			`:,`
			`:,`
			`top // LATENT_FACTOR : bottom // LATENT_FACTOR,`
			`left // LATENT_FACTOR : right // LATENT_FACTOR,`
			`] = get_latents_from_seed(`
			`region_seed, Size(right - left, bottom - top), params.batch`
			`)`
feat(api): add tokens to reseed region 2023-11-11 00:37:42 +00:00
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`pipe_type = params.get_valid_pipeline("txt2img")`
			`pipe = load_pipeline(`
			`server,`
			`params,`
			`pipe_type,`
lint(api): use more accurate worker name throughout 2023-07-15 23:54:54 +00:00			`worker.get_device(),`
lint(api): start renaming inversions to embeddings in code 2023-09-24 23:15:58 +00:00			`embeddings=inversions,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`loras=loras,`
fix(api): fully switch between LPW and regular ONNX pipelines 2023-02-05 23:36:00 +00:00			`)`
feat(api): start using chain pipelines for all images 2023-06-30 04:06:36 +00:00
skip loading component models for XL 2023-08-21 03:28:08 +00:00			`if params.is_lpw():`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`logger.debug("using LPW pipeline for txt2img")`
			`rng = torch.manual_seed(params.seed)`
attempt to save grid mode metadata 2024-01-03 03:49:22 +00:00			`output = pipe.text2img(`
fix(api): keep network tokens while replacing wildcards in the saved prompt 2023-07-07 01:39:08 +00:00			`prompt,`
fix(api): improve handling of non-square images around tile size 2023-07-10 22:41:08 +00:00			`height=latent_size.height,`
			`width=latent_size.width,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`generator=rng,`
			`guidance_scale=params.cfg,`
			`latents=latents,`
fix(api): keep network tokens while replacing wildcards in the saved prompt 2023-07-07 01:39:08 +00:00			`negative_prompt=negative_prompt,`
fix(api): pass batch size and eta params to txt2img stage 2023-07-04 22:24:37 +00:00			`num_images_per_prompt=params.batch,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`num_inference_steps=params.steps,`
fix(api): pass batch size and eta params to txt2img stage 2023-07-04 22:24:37 +00:00			`eta=params.eta,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`callback=callback,`
			`)`
			`else:`
			`# encode and record alternative prompts outside of LPW`
feat(api): disable prompt alternatives for panorama to fix regions 2023-11-15 05:23:23 +00:00			`if params.is_panorama() or params.is_xl():`
apply lint, add missing file 2023-11-19 00:13:13 +00:00			`logger.debug(`
			`"prompt alternatives are not supported for panorama or SDXL"`
			`)`
feat(api): disable prompt alternatives for panorama to fix regions 2023-11-15 05:23:23 +00:00			`else:`
fix(api): turn alternatives back off for SDXL 2023-11-12 20:23:02 +00:00			`prompt_embeds = encode_prompt(`
			`pipe, prompt_pairs, params.batch, params.do_cfg()`
			`)`
			`pipe.unet.set_prompts(prompt_embeds)`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00
			`rng = np.random.RandomState(params.seed)`
attempt to save grid mode metadata 2024-01-03 03:49:22 +00:00			`output = pipe(`
fix(api): keep network tokens while replacing wildcards in the saved prompt 2023-07-07 01:39:08 +00:00			`prompt,`
fix(api): improve handling of non-square images around tile size 2023-07-10 22:41:08 +00:00			`height=latent_size.height,`
			`width=latent_size.width,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`generator=rng,`
			`guidance_scale=params.cfg,`
pass latents again 2023-08-30 01:54:28 +00:00			`latents=latents,`
fix(api): keep network tokens while replacing wildcards in the saved prompt 2023-07-07 01:39:08 +00:00			`negative_prompt=negative_prompt,`
fix(api): pass batch size and eta params to txt2img stage 2023-07-04 22:24:37 +00:00			`num_images_per_prompt=params.batch,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`num_inference_steps=params.steps,`
fix(api): pass batch size and eta params to txt2img stage 2023-07-04 22:24:37 +00:00			`eta=params.eta,`
feat(api): make chain stages into classes with max tile size and step count estimate 2023-07-01 12:10:53 +00:00			`callback=callback,`
			`)`
feat(api): synthesize a mask for outpaint stages 2023-01-28 14:37:17 +00:00
attempt to save grid mode metadata 2024-01-03 03:49:22 +00:00			`result = StageResult(source=sources)`
			`for image in output.images:`
add networks to metadata 2024-01-03 04:14:21 +00:00			`result.push_image(`
			`image, ImageMetadata(params, size, inversions=inversions, loras=loras)`
			`)`
attempt to save grid mode metadata 2024-01-03 03:49:22 +00:00
			`logger.debug("produced %s outputs", len(result))`
			`return result`
feat(api): attempt to calculate total steps for chain pipelines 2023-09-12 23:16:16 +00:00
			`def steps(`
			`self,`
			`params: ImageParams,`
			`size: Size,`
			`) -> int:`
			`return params.steps`

			`def outputs(`
			`self,`
			`params: ImageParams,`
			`sources: int,`
			`) -> int:`
apply lint 2023-09-13 00:17:03 +00:00			`return sources + 1`