chore(api): automate more of the release test plan

2023-02-19 15:00:23 -06:00 · 2023-02-19 15:00:23 -06:00 · 62a6f14cb5
parent ad08349fbe
commit 62a6f14cb5
13 changed files with 190 additions and 44 deletions
--- a/api/scripts/test-refs/img2img-sd-v1-5-256-pumpkin.png
+++ b/api/scripts/test-refs/img2img-sd-v1-5-256-pumpkin.png
--- a/api/scripts/test-refs/img2img-sd-v1-5-512-pumpkin.png
+++ b/api/scripts/test-refs/img2img-sd-v1-5-512-pumpkin.png
--- a/api/scripts/test-refs/inpaint-v1-512-black.png
+++ b/api/scripts/test-refs/inpaint-v1-512-black.png
--- a/api/scripts/test-refs/inpaint-v1-512-white.png
+++ b/api/scripts/test-refs/inpaint-v1-512-white.png
--- a/api/scripts/test-refs/mask-black.png
+++ b/api/scripts/test-refs/mask-black.png
--- a/api/scripts/test-refs/mask-white.png
+++ b/api/scripts/test-refs/mask-white.png
--- a/api/scripts/test-refs/outpaint-even-256.png
+++ b/api/scripts/test-refs/outpaint-even-256.png
--- a/api/scripts/test-refs/outpaint-horizontal-512.png
+++ b/api/scripts/test-refs/outpaint-horizontal-512.png
--- a/api/scripts/test-refs/outpaint-vertical-512.png
+++ b/api/scripts/test-refs/outpaint-vertical-512.png
--- a/api/scripts/test-refs/upscale-resrgan-x2-1024-muffin.png
+++ b/api/scripts/test-refs/upscale-resrgan-x2-1024-muffin.png
--- a/api/scripts/test-refs/upscale-resrgan-x4-2048-muffin.png
+++ b/api/scripts/test-refs/upscale-resrgan-x4-2048-muffin.png
--- a/api/scripts/test-release.py
+++ b/api/scripts/test-release.py
@ -1,12 +1,12 @@
 import sys
 import traceback
 from collections import Counter
 from io import BytesIO
 from logging import getLogger
 from logging.config import dictConfig
 from os import environ, path
 from time import sleep
 from typing import Optional
 from collections import Counter
 import cv2
 import numpy as np
@ -14,18 +14,46 @@ import requests
 from PIL import Image
 from yaml import safe_load
 logging_path = environ.get("ONNX_WEB_LOGGING_PATH", "./logging.yaml")
 try:
    if path.exists(logging_path):
        with open(logging_path, "r") as f:
            config_logging = safe_load(f)
            dictConfig(config_logging)
 except Exception as err:
    print("error loading logging config: %s" % (err))
 logger = getLogger(__name__)
 def test_root() -> str:
    if len(sys.argv) > 1:
        return sys.argv[1]
    else:
        return "http://127.0.0.1:5000"
 def test_path(relpath: str) -> str:
    return path.join(path.dirname(__file__), relpath)
 class TestCase:
    def __init__(
        self,
        name: str,
        query: str,
        max_attempts: int = 20,
-        mse_threshold: float = 0.0001,
+        mse_threshold: float = 0.001,
        source: Image.Image = None,
        mask: Image.Image = None,
    ) -> None:
        self.name = name
        self.query = query
        self.max_attempts = max_attempts
        self.mse_threshold = mse_threshold
        self.source = source
        self.mask = mask
 TEST_DATA = [
@ -65,34 +93,93 @@ TEST_DATA = [
        "txt2img-knollingcase-512-muffin",
        "txt2img?prompt=knollingcase+display+case+with+a+giant+muffin&seed=0&scheduler=ddim&model=diffusion-knollingcase",
    ),
    TestCase(
        "img2img-sd-v1-5-512-pumpkin",
        "img2img?prompt=a+giant+pumpkin&seed=0&scheduler=ddim",
        source="txt2img-sd-v1-5-512-muffin",
    ),
    TestCase(
        "img2img-sd-v1-5-256-pumpkin",
        "img2img?prompt=a+giant+pumpkin&seed=0&scheduler=ddim",
        source="txt2img-sd-v1-5-256-muffin",
    ),
    TestCase(
        "inpaint-v1-512-white",
        "inpaint?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&model=stable-diffusion-onnx-v1-inpainting",
        source="txt2img-sd-v1-5-512-muffin",
        mask="mask-white",
    ),
    TestCase(
        "inpaint-v1-512-black",
        "inpaint?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&model=stable-diffusion-onnx-v1-inpainting",
        source="txt2img-sd-v1-5-512-muffin",
        mask="mask-black",
    ),
    TestCase(
        "outpaint-even-256",
        (
            "inpaint?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&model=stable-diffusion-onnx-v1-inpainting&noise=fill-mask"
            "&top=256&bottom=256&left=256&right=256"
        ),
        source="txt2img-sd-v1-5-512-muffin",
        mask="mask-black",
        mse_threshold=0.025,
    ),
    TestCase(
        "outpaint-vertical-512",
        (
            "inpaint?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&model=stable-diffusion-onnx-v1-inpainting&noise=fill-mask"
            "&top=512&bottom=512&left=0&right=0"
        ),
        source="txt2img-sd-v1-5-512-muffin",
        mask="mask-black",
        mse_threshold=0.025,
    ),
    TestCase(
        "outpaint-horizontal-512",
        (
            "inpaint?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&model=stable-diffusion-onnx-v1-inpainting&noise=fill-mask"
            "&top=0&bottom=0&left=512&right=512"
        ),
        source="txt2img-sd-v1-5-512-muffin",
        mask="mask-black",
        mse_threshold=0.025,
    ),
    TestCase(
        "upscale-resrgan-x4-2048-muffin",
        "upscale?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&upscaling=upscaling-real-esrgan-x4-plus&scale=4&outscale=4",
        source="txt2img-sd-v1-5-512-muffin",
    ),
    TestCase(
        "upscale-resrgan-x2-1024-muffin",
        "upscale?prompt=a+giant+pumpkin&seed=0&scheduler=ddim&upscaling=upscaling-real-esrgan-x2-plus&scale=2&outscale=2",
        source="txt2img-sd-v1-5-512-muffin",
    ),
 ]
 logging_path = environ.get("ONNX_WEB_LOGGING_PATH", "./logging.yaml")
-try:
+def generate_image(root: str, test: TestCase) -> Optional[str]:
-    if path.exists(logging_path):
+    files = {}
-        with open(logging_path, "r") as f:
+    if test.source is not None:
-            config_logging = safe_load(f)
+        logger.debug("loading test source: %s", test.source)
-            dictConfig(config_logging)
+        source_path = test_path(path.join("test-refs", f"{test.source}.png"))
-except Exception as err:
+        source_image = Image.open(source_path)
-    print("error loading logging config: %s" % (err))
+        source_bytes = BytesIO()
        source_image.save(source_bytes, "png")
        source_bytes.seek(0)
        files["source"] = source_bytes
-logger = getLogger(__name__)
+    if test.mask is not None:
        logger.debug("loading test mask: %s", test.mask)
        mask_path = test_path(path.join("test-refs", f"{test.mask}.png"))
        mask_image = Image.open(mask_path)
        mask_bytes = BytesIO()
        mask_image.save(mask_bytes, "png")
        mask_bytes.seek(0)
        files["mask"] = mask_bytes
-
+    logger.debug("generating image: %s", test.query)
-def test_root() -> str:
+    resp = requests.post(f"{root}/api/{test.query}", files=files)
    if len(sys.argv) > 1:
        return sys.argv[1]
    else:
        return "http://127.0.0.1:5000"
 def test_path(relpath: str) -> str:
    return path.join(path.dirname(__file__), relpath)
 def generate_image(root: str, params: str) -> Optional[str]:
    resp = requests.post(f"{root}/api/{params}")
    if resp.status_code == 200:
        json = resp.json()
        return json.get("output")
@ -114,6 +201,7 @@ def check_ready(root: str, key: str) -> bool:
 def download_image(root: str, key: str) -> Image.Image:
    resp = requests.get(f"{root}/output/{key}")
    if resp.status_code == 200:
        logger.debug("downloading image: %s", key)
        return Image.open(BytesIO(resp.content))
    else:
        logger.warning("request failed: %s", resp.status_code)
@ -132,10 +220,11 @@ def find_mse(result: Image.Image, ref: Image.Image) -> float:
    nd_result = np.array(result)
    nd_ref = np.array(ref)
-    diff = cv2.subtract(nd_ref, nd_result)
+    # dividing before squaring reduces the error into the lower end of the [0, 1] range
    diff = cv2.subtract(nd_ref, nd_result) / 255.0
    diff = np.sum(diff**2)
-    return diff / (float(ref.height * ref.width)) / 255.0
+    return diff / (float(ref.height * ref.width))
 def run_test(
@ -147,16 +236,19 @@ def run_test(
    Generate an image, wait for it to be ready, and calculate the MSE from the reference.
    """
-    logger.info("running test: %s", test.query)
+    key = generate_image(root, test)
    key = generate_image(root, test.query)
    if key is None:
        raise ValueError("could not generate")
    attempts = 0
-    while attempts < test.max_attempts and not check_ready(root, key):
+    while attempts < test.max_attempts:
-        logger.debug("waiting for image to be ready")
+        if check_ready(root, key):
-        sleep(6)
+            logger.debug("image is ready: %s", key)
            break
        else:
            logger.debug("waiting for image to be ready")
            attempts += 1
            sleep(6)
    if attempts == test.max_attempts:
        raise ValueError("image was not ready in time")
@ -166,7 +258,7 @@ def run_test(
    mse = find_mse(result, ref)
    if mse < test.mse_threshold:
-        logger.debug("MSE within threshold: %.4f < %.4f", mse, test.mse_threshold)
+        logger.info("MSE within threshold: %.4f < %.4f", mse, test.mse_threshold)
        return True
    else:
        logger.warning("MSE above threshold: %.4f > %.4f", mse, test.mse_threshold)
@ -177,29 +269,29 @@ def main():
    root = test_root()
    logger.info("running release tests against API: %s", root)
-    results = Counter({
+    passed = []
-        True: 0,
+    failed = []
        False: 0,
    })
    for test in TEST_DATA:
        try:
            logger.info("starting test: %s", test.name)
            ref_name = test_path(path.join("test-refs", f"{test.name}.png"))
            ref = Image.open(ref_name) if path.exists(ref_name) else None
            if run_test(root, test, ref):
                logger.info("test passed: %s", test.name)
-                results[True] += 1
+                passed.append(test.name)
            else:
                logger.warning("test failed: %s", test.name)
-                results[False] += 1
+                failed.append(test.name)
        except Exception as e:
            traceback.print_exception(type(e), e, e.__traceback__)
            logger.error("error running test for %s: %s", test.name, e)
-            results[False] += 1
+            failed.append(test.name)
-    logger.info("%s of %s tests passed", results[True], results[True] + results[False])
+    logger.info("%s of %s tests passed", len(passed), len(TEST_DATA))
-    if results[False] > 0:
+    if len(failed) > 0:
-        logger.error("%s tests had errors", results[False])
+        logger.error("%s tests had errors", len(failed))
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/docs/dev-test.md
+++ b/docs/dev-test.md
@ -65,46 +65,63 @@ Testing:
 - txt2img
  - 256x256 with SD v1.5
    - [ ] should fail: neon blobs
    - has automation
  - 512x512 with SD v1.5
    - DEIS Multi
      - [ ] should work
    - DPM Multi
      - [ ] should work
      - has automation
    - Euler A
      - [ ] should work
    - Heun
      - [ ] should work
      - has automation
  - 512x512 with SD v2.1
    - [ ] should work
    - has automation
  - 768x768 with SD v2.1
    - [ ] should work, given sufficient memory
    - has automation
  - extra models
    - 512x512 with Knollingcase
      - [ ] should work
      - has automation
    - 512x512 with OpenJourney
      - [ ] should work
      - has automation
    - 256x256 with OpenJourney
      - [ ] should work
 - img2img
  - 256x256 input
    - [ ] should fail: neon blobs
    - has automation
  - 512x512 input
    - [ ] should work
    - has automation
  - 1024x768 input
    - [ ] should work
 - inpaint
  - regular inpaint
    - black mask
      - [ ] should keep all pixels, same image
      - has automation
    - white mask
      - [ ] should replace all pixels, different image
      - has automation
  - outpaint
    - 0 all sides
      - [ ] should work, run 1 tile
      - primarily a client-side test
    - 256 all sides
      - [ ] should work, run 8 tiles
      - has automation
    - 512 top and bottom, 0 left and right
      - [ ] should work, run 3 tiles
      - has automation
    - 512 left and right, 0 top and bottom
      - [ ] should work, run 3 tiles
      - has automation
 - upscale
  - Real ESRGAN
    - x4 with CodeFormer
@ -113,6 +130,10 @@ Testing:
      - [ ] should work
    - x4 without face correction
      - [ ] should work
      - has automation
    - x2 without face correction
      - [ ] should work
      - has automation
    - x2 model and x4 scale
      - [ ] should sort of work: ignores scale and uses x2
    - x4 model and x2 scale