diff --git a/api/onnx_web/utils.py b/api/onnx_web/utils.py index 54c913e8..4755c0ee 100644 --- a/api/onnx_web/utils.py +++ b/api/onnx_web/utils.py @@ -97,10 +97,12 @@ def run_gc(devices: Optional[List[DeviceParams]] = None): torch.cuda.empty_cache() torch.cuda.ipc_collect() mem_free, mem_total = torch.cuda.mem_get_info() + mem_pct = (1 - (mem_free / mem_total)) * 100 logger.debug( - "remaining CUDA VRAM usage: %s of %s", + "CUDA VRAM usage: %s of %s (%.2f%%)", (mem_total - mem_free), mem_total, + mem_pct, ) diff --git a/api/onnx_web/worker/pool.py b/api/onnx_web/worker/pool.py index d9b838da..60d0930f 100644 --- a/api/onnx_web/worker/pool.py +++ b/api/onnx_web/worker/pool.py @@ -291,7 +291,9 @@ class DevicePoolExecutor: logger.debug("shutting down worker for device %s", device) worker.join(self.join_timeout) if worker.is_alive(): - logger.error("leaking worker for device %s could not be shut down", device) + logger.error( + "leaking worker for device %s could not be shut down", device + ) self.leaking[:] = [dw for dw in self.leaking if dw[1].is_alive()]