diff --git a/api/onnx_web/worker/pool.py b/api/onnx_web/worker/pool.py index cd78051d..34e5b70c 100644 --- a/api/onnx_web/worker/pool.py +++ b/api/onnx_web/worker/pool.py @@ -135,10 +135,12 @@ class DevicePoolExecutor: if proc.is_alive(): logger.debug("shutting down worker for device %s", name) proc.join(5) + proc.terminate() else: logger.warning("worker for device %s has died", name) self.workers[name] = None + del proc logger.info("starting new workers") @@ -161,6 +163,7 @@ class DevicePoolExecutor: **kwargs, ) -> None: self.job_count += 1 + logger.debug("pool job count: %s", self.job_count) if self.job_count > 10: self.recycle() self.job_count = 0 diff --git a/api/onnx_web/worker/worker.py b/api/onnx_web/worker/worker.py index 4edfb4bb..07c6bb02 100644 --- a/api/onnx_web/worker/worker.py +++ b/api/onnx_web/worker/worker.py @@ -3,6 +3,7 @@ import torch # has to come before ORT from onnxruntime import get_available_providers from torch.multiprocessing import Lock, Queue from traceback import format_exception +from setproctitle import setproctitle from .context import WorkerContext from ..server import ServerContext, apply_patches @@ -14,6 +15,8 @@ def logger_init(lock: Lock, logs: Queue): with lock: logger.info("checking in from logger, %s", lock) + setproctitle("onnx-web logger") + while True: job = logs.get() with open("worker.log", "w") as f: @@ -26,6 +29,7 @@ def worker_init(lock: Lock, context: WorkerContext, server: ServerContext): logger.info("checking in from worker, %s, %s", lock, get_available_providers()) apply_patches(server) + setproctitle("onnx-web worker: %s", context.device.device) while True: job = context.pending.get() diff --git a/api/requirements.txt b/api/requirements.txt index e08b7c31..2ac35af8 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -23,3 +23,4 @@ flask flask-cors jsonschema pyyaml +setproctitle \ No newline at end of file diff --git a/api/scripts/test-memory-leak.sh b/api/scripts/test-memory-leak.sh index 5e432e43..d7545e60 100755 --- a/api/scripts/test-memory-leak.sh +++ b/api/scripts/test-memory-leak.sh @@ -14,5 +14,5 @@ do --insecure || break; ((test_images++)); echo "waiting after $test_images"; - sleep 10; + sleep 30; done