feat(api): add img2img and inpaint chain stages

2023-01-28 12:42:02 -06:00 · 2023-01-28 12:42:02 -06:00 · dcbd059082
parent 4188b019a1
commit dcbd059082
13 changed files with 209 additions and 48 deletions
--- a/api/onnx_web/chain/init.py
+++ b/api/onnx_web/chain/init.py
@ -4,15 +4,24 @@ from .base import (
  StageCallback,
  StageParams,
 )
+from .blend_img2img import (
+  blend_img2img,
+)
+from .blend_inpaint import (
+  blend_inpaint,
+)
 from .correct_gfpgan import (
  correct_gfpgan,
 )
-from .generate_txt2img import (
-  generate_txt2img,
-)
 from .persist_disk import (
  persist_disk,
 )
+from .persist_s3 import (
+  persist_s3,
+)
+from .source_txt2img import (
+  source_txt2img,
+)
 from .upscale_outpaint import (
  upscale_outpaint,
 )
--- a/api/onnx_web/chain/base.py
+++ b/api/onnx_web/chain/base.py
@ -7,6 +7,7 @@ from ..params import (
    StageParams,
 )
 from ..utils import (
+    is_debug,
    ServerContext,
 )
 from .utils import (
@ -68,10 +69,13 @@ class ChainPipeline:
                print('source image larger than tile size of %s, tiling stage' % (
                    stage_params.tile_size))

-                def stage_tile(tile: Image.Image) -> Image.Image:
+                def stage_tile(tile: Image.Image, _dims) -> Image.Image:
                    tile = stage_pipe(ctx, stage_params, params, tile,
                                      **kwargs)
+
+                    if is_debug():
                        tile.save(path.join(ctx.output_path, 'last-tile.png'))
+
                    return tile

                image = process_tiles(
--- a/api/onnx_web/chain/blend_img2img.py
+++ b/api/onnx_web/chain/blend_img2img.py
@ -0,0 +1,48 @@
+from diffusers import (
+    OnnxStableDiffusionImg2ImgPipeline,
+)
+from PIL import Image
+
+from ..diffusion import (
+    load_pipeline,
+)
+from ..params import (
+    ImageParams,
+    StageParams,
+)
+from ..utils import (
+    ServerContext,
+)
+
+import numpy as np
+
+
+def blend_img2img(
+    ctx: ServerContext,
+    stage: StageParams,
+    params: ImageParams,
+    source_image: Image.Image,
+    *,
+    strength: float,
+) -> Image.Image:
+    print('generating image using img2img', params.prompt)
+
+    pipe = load_pipeline(OnnxStableDiffusionImg2ImgPipeline,
+                            params.model, params.provider, params.scheduler)
+
+    rng = np.random.RandomState(params.seed)
+
+    result = pipe(
+        params.prompt,
+        generator=rng,
+        guidance_scale=params.cfg,
+        image=source_image,
+        negative_prompt=params.negative_prompt,
+        num_inference_steps=params.steps,
+        strength=strength,
+    )
+    output = result.images[0]
+
+    print('final output image size', output.size)
+    return output
+
--- a/api/onnx_web/chain/blend_inpaint.py
+++ b/api/onnx_web/chain/blend_inpaint.py
@ -0,0 +1,99 @@
+from diffusers import (
+    OnnxStableDiffusionInpaintPipeline,
+)
+from PIL import Image
+from typing import Callable, Tuple
+
+from ..diffusion import (
+    get_latents_from_seed,
+    load_pipeline,
+)
+from ..image import (
+    expand_image,
+    mask_filter_none,
+    noise_source_histogram,
+)
+from ..params import (
+    Border,
+    ImageParams,
+    Size,
+    StageParams,
+)
+from ..utils import (
+    base_join,
+    is_debug,
+    ServerContext,
+)
+from .utils import (
+    process_tiles,
+)
+
+import numpy as np
+
+
+def blend_inpaint(
+    ctx: ServerContext,
+    stage: StageParams,
+    params: ImageParams,
+    source_image: Image.Image,
+    *,
+    expand: Border,
+    mask_image: Image.Image = None,
+    fill_color: str = 'white',
+    mask_filter: Callable = mask_filter_none,
+    noise_source: Callable = noise_source_histogram,
+) -> Image.Image:
+    print('upscaling image by expanding borders', expand)
+
+    if mask_image is None:
+        # if no mask was provided, keep the full source image
+        mask_image = Image.new('RGB', source_image.size, 'black')
+
+    source_image, mask_image, noise_image, _full_dims = expand_image(
+        source_image,
+        mask_image,
+        expand,
+        fill=fill_color,
+        noise_source=noise_source,
+        mask_filter=mask_filter)
+
+    if is_debug():
+        source_image.save(base_join(ctx.output_path, 'last-source.png'))
+        mask_image.save(base_join(ctx.output_path, 'last-mask.png'))
+        noise_image.save(base_join(ctx.output_path, 'last-noise.png'))
+
+    def outpaint(image: Image.Image, dims: Tuple[int, int, int]):
+        left, top, tile = dims
+        size = Size(*image.size)
+        mask = mask_image.crop((left, top, left + tile, top + tile))
+
+        if is_debug():
+            image.save(base_join(ctx.output_path, 'tile-source.png'))
+            mask.save(base_join(ctx.output_path, 'tile-mask.png'))
+
+        # TODO: must use inpainting model here
+        model = '../models/stable-diffusion-onnx-v1-inpainting'
+        pipe = load_pipeline(OnnxStableDiffusionInpaintPipeline,
+                             model, params.provider, params.scheduler)
+
+        latents = get_latents_from_seed(params.seed, size)
+        rng = np.random.RandomState(params.seed)
+
+        result = pipe(
+            params.prompt,
+            generator=rng,
+            guidance_scale=params.cfg,
+            height=size.height,
+            image=image,
+            latents=latents,
+            mask_image=mask,
+            negative_prompt=params.negative_prompt,
+            num_inference_steps=params.steps,
+            width=size.width,
+        )
+        return result.images[0]
+
+    output = process_tiles(source_image, 512, 1, [outpaint])
+
+    print('final output image size', output.size)
+    return output
--- a/api/onnx_web/chain/correct_gfpgan.py
+++ b/api/onnx_web/chain/correct_gfpgan.py
@ -57,7 +57,7 @@ def correct_gfpgan(
    *,
    upscale: UpscaleParams,
    upsampler: Optional[RealESRGANer] = None,
-) -> Image:
+) -> Image.Image:
    if upscale.correction_model is None:
        print('no face model given, skipping')
        return image
--- a/api/onnx_web/chain/persist_disk.py
+++ b/api/onnx_web/chain/persist_disk.py
@ -13,8 +13,8 @@ from ..utils import (

 def persist_disk(
    ctx: ServerContext,
-    stage: StageParams,
-    params: ImageParams,
+    _stage: StageParams,
+    _params: ImageParams,
    source_image: Image.Image,
    *,
    output: str,
--- a/api/onnx_web/chain/generate_txt2img.py
+++ b/api/onnx_web/chain/generate_txt2img.py
@ -2,43 +2,31 @@ from diffusers import (
    OnnxStableDiffusionPipeline,
 )
 from PIL import Image
-from typing import Callable

 from ..diffusion import (
    get_latents_from_seed,
    load_pipeline,
 )
-from ..image import (
-    expand_image,
-    mask_filter_none,
-    noise_source_histogram,
-)
 from ..params import (
-    Border,
    ImageParams,
    Size,
    StageParams,
 )
 from ..utils import (
-    base_join,
-    is_debug,
    ServerContext,
 )
-from .utils import (
-    process_tiles,
-)

 import numpy as np


-def generate_txt2img(
+def source_txt2img(
    ctx: ServerContext,
    stage: StageParams,
    params: ImageParams,
    source_image: Image.Image,
    *,
    size: Size,
-) -> Image:
+) -> Image.Image:
    print('generating image using txt2img', params.prompt)

    if source_image is not None:
--- a/api/onnx_web/chain/upscale_resrgan.py
+++ b/api/onnx_web/chain/upscale_resrgan.py
@ -75,7 +75,7 @@ def upscale_resrgan(
    source_image: Image.Image,
    *,
    upscale: UpscaleParams,
-) -> Image:
+) -> Image.Image:
    print('upscaling image with Real ESRGAN', upscale.scale)

    output = np.array(source_image)
--- a/api/onnx_web/chain/utils.py
+++ b/api/onnx_web/chain/utils.py
@ -1,13 +1,18 @@
 from PIL import Image
-from typing import Callable, List
+from typing import List, Protocol, Tuple
+
+
+class TileCallback(Protocol):
+    def __call__(self, image: Image.Image, dims: Tuple[int, int, int]) -> Image.Image:
+        pass


 def process_tiles(
    source: Image.Image,
    tile: int,
    scale: int,
-    filters: List[Callable],
-) -> Image:
+    filters: List[TileCallback],
+) -> Image.Image:
    width, height = source.size
    image = Image.new('RGB', (width * scale, height * scale))

--- a/api/onnx_web/diffusion.py
+++ b/api/onnx_web/diffusion.py
@ -104,7 +104,7 @@ def run_txt2img_pipeline(
    size: Size,
    output: str,
    upscale: UpscaleParams
-):
+) -> None:
    pipe = load_pipeline(OnnxStableDiffusionPipeline,
                         params.model, params.provider, params.scheduler)

@ -139,9 +139,9 @@ def run_img2img_pipeline(
    params: ImageParams,
    output: str,
    upscale: UpscaleParams,
-    source_image: Image,
+    source_image: Image.Image,
    strength: float,
-):
+) -> None:
    pipe = load_pipeline(OnnxStableDiffusionImg2ImgPipeline,
                         params.model, params.provider, params.scheduler)

@ -176,14 +176,14 @@ def run_inpaint_pipeline(
    size: Size,
    output: str,
    upscale: UpscaleParams,
-    source_image: Image,
-    mask_image: Image,
+    source_image: Image.Image,
+    mask_image: Image.Image,
    expand: Border,
    noise_source: Any,
    mask_filter: Any,
    strength: float,
    fill_color: str,
-):
+) -> None:
    pipe = load_pipeline(OnnxStableDiffusionInpaintPipeline,
                         params.model, params.provider, params.scheduler)

@ -241,8 +241,8 @@ def run_upscale_pipeline(
    _size: Size,
    output: str,
    upscale: UpscaleParams,
-    source_image: Image
-):
+    source_image: Image.Image,
+) -> None:
    image = run_upscale_correction(
        ctx, StageParams(), params, source_image, upscale=upscale)

--- a/api/onnx_web/image.py
+++ b/api/onnx_web/image.py
@ -1,6 +1,5 @@
 from numpy import random
 from PIL import Image, ImageChops, ImageFilter
-from typing import Callable, List

 import numpy as np

@ -14,7 +13,7 @@ def get_pixel_index(x: int, y: int, width: int) -> int:
    return (y * width) + x


-def mask_filter_none(mask_image: Image, dims: Point, origin: Point, fill='white', **kw) -> Image:
+def mask_filter_none(mask_image: Image.Image, dims: Point, origin: Point, fill='white', **kw) -> Image.Image:
    width, height = dims

    noise = Image.new('RGB', (width, height), fill)
@ -23,7 +22,7 @@ def mask_filter_none(mask_image: Image, dims: Point, origin: Point, fill='white'
    return noise


-def mask_filter_gaussian_multiply(mask_image: Image, dims: Point, origin: Point, rounds=3, **kw) -> Image:
+def mask_filter_gaussian_multiply(mask_image: Image.Image, dims: Point, origin: Point, rounds=3, **kw) -> Image.Image:
    '''
    Gaussian blur with multiply, source image centered on white canvas.
    '''
@ -36,7 +35,7 @@ def mask_filter_gaussian_multiply(mask_image: Image, dims: Point, origin: Point,
    return noise


-def mask_filter_gaussian_screen(mask_image: Image, dims: Point, origin: Point, rounds=3, **kw) -> Image:
+def mask_filter_gaussian_screen(mask_image: Image.Image, dims: Point, origin: Point, rounds=3, **kw) -> Image.Image:
    '''
    Gaussian blur, source image centered on white canvas.
    '''
@ -49,7 +48,7 @@ def mask_filter_gaussian_screen(mask_image: Image, dims: Point, origin: Point, r
    return noise


-def noise_source_fill_edge(source_image: Image, dims: Point, origin: Point, fill='white', **kw) -> Image:
+def noise_source_fill_edge(source_image: Image.Image, dims: Point, origin: Point, fill='white', **kw) -> Image.Image:
    '''
    Identity transform, source image centered on white canvas.
    '''
@ -61,7 +60,7 @@ def noise_source_fill_edge(source_image: Image, dims: Point, origin: Point, fill
    return noise


-def noise_source_fill_mask(source_image: Image, dims: Point, origin: Point, fill='white', **kw) -> Image:
+def noise_source_fill_mask(source_image: Image.Image, dims: Point, origin: Point, fill='white', **kw) -> Image.Image:
    '''
    Fill the whole canvas, no source or noise.
    '''
@ -72,7 +71,7 @@ def noise_source_fill_mask(source_image: Image, dims: Point, origin: Point, fill
    return noise


-def noise_source_gaussian(source_image: Image, dims: Point, origin: Point, rounds=3, **kw) -> Image:
+def noise_source_gaussian(source_image: Image.Image, dims: Point, origin: Point, rounds=3, **kw) -> Image.Image:
    '''
    Gaussian blur, source image centered on white canvas.
    '''
@ -85,7 +84,7 @@ def noise_source_gaussian(source_image: Image, dims: Point, origin: Point, round
    return noise


-def noise_source_uniform(source_image: Image, dims: Point, origin: Point, **kw) -> Image:
+def noise_source_uniform(source_image: Image.Image, dims: Point, origin: Point, **kw) -> Image.Image:
    width, height = dims
    size = width * height

@ -107,7 +106,7 @@ def noise_source_uniform(source_image: Image, dims: Point, origin: Point, **kw)
    return noise


-def noise_source_normal(source_image: Image, dims: Point, origin: Point, **kw) -> Image:
+def noise_source_normal(source_image: Image.Image, dims: Point, origin: Point, **kw) -> Image.Image:
    width, height = dims
    size = width * height

@ -129,7 +128,7 @@ def noise_source_normal(source_image: Image, dims: Point, origin: Point, **kw) -
    return noise


-def noise_source_histogram(source_image: Image, dims: Point, origin: Point, **kw) -> Image:
+def noise_source_histogram(source_image: Image.Image, dims: Point, origin: Point, **kw) -> Image.Image:
    r, g, b = source_image.split()
    width, height = dims
    size = width * height
@ -161,8 +160,8 @@ def noise_source_histogram(source_image: Image, dims: Point, origin: Point, **kw

 # very loosely based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/scripts/outpainting_mk_2.py#L175-L232
 def expand_image(
-        source_image: Image,
-        mask_image: Image,
+        source_image: Image.Image,
+        mask_image: Image.Image,
        expand: Border,
        fill='white',
        noise_source=noise_source_histogram,
--- a/api/onnx_web/serve.py
+++ b/api/onnx_web/serve.py
@ -24,8 +24,9 @@ from typing import Tuple

 from .chain import (
    correct_gfpgan,
-    generate_txt2img,
+    source_txt2img,
    persist_disk,
+    persist_s3,
    upscale_outpaint,
    upscale_resrgan,
    upscale_stable_diffusion,
@ -546,7 +547,7 @@ def chain():

    # parse body as json, list of stages
    example = ChainPipeline(stages=[
-        (generate_txt2img, StageParams(), {
+        (source_txt2img, StageParams(), {
            'size': size,
        }),
        (upscale_outpaint, StageParams(), {
@ -561,6 +562,10 @@ def chain():
        (persist_disk, StageParams(tile_size=8192), {
            'output': output,
        }),
+        (persist_s3, StageParams(tile_size=8192), {
+            'bucket': 'storage-stable-diffusion',
+            'output': output,
+        }),
    ])

    # build and run chain pipeline
--- a/api/onnx_web/upscale.py
+++ b/api/onnx_web/upscale.py
@ -24,6 +24,10 @@ def run_upscale_correction(
    *,
    upscale: UpscaleParams,
 ) -> Image.Image:
+    '''
+    This is a convenience method for a chain pipeline that will run upscaling and
+    correction, based on the `upscale` params.
+    '''
    print('running upscale pipeline')

    chain = ChainPipeline()