diff --git a/api/onnx_web/chain/base.py b/api/onnx_web/chain/base.py index 0a300ddc..a3ffb54d 100644 --- a/api/onnx_web/chain/base.py +++ b/api/onnx_web/chain/base.py @@ -5,6 +5,7 @@ from typing import Any, List, Optional, Tuple from PIL import Image +from ..errors import RetryException from ..output import save_image from ..params import ImageParams, StageParams from ..server import ServerContext @@ -177,7 +178,7 @@ class ChainPipeline: run_gc([job.get_device()]) job.retries = job.retries - (i + 1) - raise RuntimeError("exhausted retries on tile") + raise RetryException("exhausted retries on tile") output = process_tile_order( stage_params.tile_order, @@ -216,7 +217,7 @@ class ChainPipeline: job.retries = job.retries - (i + 1) if job.retries <= 0: - raise RuntimeError("exhausted retries on stage") + raise RetryException("exhausted retries on stage") logger.debug( "finished stage %s with %s results", diff --git a/api/onnx_web/errors.py b/api/onnx_web/errors.py new file mode 100644 index 00000000..c51f2b57 --- /dev/null +++ b/api/onnx_web/errors.py @@ -0,0 +1,6 @@ +class RetryException(Exception): + """ + Used when a chain pipeline has run out of retries. + """ + + pass diff --git a/api/onnx_web/worker/worker.py b/api/onnx_web/worker/worker.py index 17267cc6..8d714722 100644 --- a/api/onnx_web/worker/worker.py +++ b/api/onnx_web/worker/worker.py @@ -5,6 +5,7 @@ from sys import exit from setproctitle import setproctitle +from ..errors import RetryException from ..server import ServerContext, apply_patches from ..torch_before_ort import get_available_providers from .context import WorkerContext @@ -69,6 +70,10 @@ def worker_main(worker: WorkerContext, server: ServerContext): logger.info("worker got keyboard interrupt") worker.fail() exit(EXIT_INTERRUPT) + except RetryException: + logger.info("retry error in worker, exiting: %s") + worker.fail() + exit(EXIT_ERROR) except ValueError: logger.exception("value error in worker, exiting: %s") worker.fail()