1
0
Fork 0
onnx-web/api/onnx_web/worker/context.py

244 lines
7.0 KiB
Python
Raw Normal View History

2023-02-26 05:49:39 +00:00
from logging import getLogger
from os import getpid
from typing import Any, Callable, Optional
2023-02-26 05:49:39 +00:00
import numpy as np
2023-02-26 20:15:30 +00:00
from torch.multiprocessing import Queue, Value
from ..errors import CancelledException
2023-02-26 05:49:39 +00:00
from ..params import DeviceParams
2024-01-05 01:09:52 +00:00
from .command import JobCommand, JobStatus, Progress, ProgressCommand
2023-02-26 05:49:39 +00:00
logger = getLogger(__name__)
ProgressCallback = Callable[[int, int, np.ndarray], None]
2023-02-26 05:49:39 +00:00
2023-02-26 20:15:30 +00:00
2023-02-26 05:49:39 +00:00
class WorkerContext:
cancel: "Value[bool]"
job: Optional[str]
job_type: Optional[str]
name: str
pending: "Queue[JobCommand]"
active_pid: "Value[int]"
progress: "Queue[ProgressCommand]"
last_progress: Optional[ProgressCommand]
idle: "Value[bool]"
timeout: float
2023-07-15 16:20:25 +00:00
retries: int
initial_retries: int
callback: Optional[Any]
2023-02-26 05:49:39 +00:00
2024-01-06 02:12:41 +00:00
# progress state
steps: Progress
stages: Progress
tiles: Progress
2023-02-26 05:49:39 +00:00
def __init__(
self,
name: str,
2023-02-26 05:49:39 +00:00
device: DeviceParams,
cancel: "Value[bool]",
logs: "Queue[str]",
pending: "Queue[JobCommand]",
progress: "Queue[ProgressCommand]",
active_pid: "Value[int]",
idle: "Value[bool]",
retries: int,
timeout: float,
2023-02-26 05:49:39 +00:00
):
self.job = None
self.job_type = None
self.name = name
2023-02-26 05:49:39 +00:00
self.device = device
2023-02-27 02:09:42 +00:00
self.cancel = cancel
2023-02-26 05:49:39 +00:00
self.progress = progress
2023-02-27 02:09:42 +00:00
self.logs = logs
self.pending = pending
self.active_pid = active_pid
2023-03-18 22:27:41 +00:00
self.last_progress = None
self.idle = idle
self.initial_retries = retries
self.retries = retries
self.timeout = timeout
2024-01-04 03:20:46 +00:00
self.callback = None
2024-01-06 02:12:41 +00:00
self.steps = Progress(0, 0)
self.stages = Progress(0, 0)
self.tiles = Progress(0, 0)
2023-02-26 05:49:39 +00:00
def start(self, job: JobCommand) -> None:
# set job name and type
self.job = job.name
self.job_type = job.job_type
# reset retries
self.retries = self.initial_retries
# clear flags
self.set_cancel(cancel=False)
self.set_idle(idle=False)
def is_active(self) -> bool:
return self.get_active() == getpid()
2023-02-26 05:49:39 +00:00
def is_cancelled(self) -> bool:
return self.cancel.value
def is_idle(self) -> bool:
return self.idle.value
def get_active(self) -> int:
with self.active_pid.get_lock():
return self.active_pid.value
2023-02-26 05:49:39 +00:00
def get_device(self) -> DeviceParams:
"""
Get the device assigned to this job.
"""
return self.device
2024-01-05 01:09:52 +00:00
def get_progress(self) -> Progress:
return self.get_last_steps()
def get_last_steps(self) -> Progress:
2024-01-06 02:12:41 +00:00
return self.steps
2024-01-05 01:09:52 +00:00
def get_last_stages(self) -> Progress:
2024-01-06 02:12:41 +00:00
return self.stages
2024-01-05 01:09:52 +00:00
def get_last_tiles(self) -> Progress:
2024-01-06 02:12:41 +00:00
return self.tiles
2023-02-26 05:49:39 +00:00
2024-01-04 03:39:19 +00:00
def get_progress_callback(self, reset=False) -> ProgressCallback:
from ..chain.pipeline import ChainProgress
2023-04-30 04:31:08 +00:00
2024-01-04 03:39:19 +00:00
if not reset and self.callback is not None:
return self.callback
2023-02-26 05:49:39 +00:00
def on_progress(step: int, timestep: int, latents: Any):
2024-01-04 03:31:41 +00:00
self.set_progress(
step,
)
2023-02-26 05:49:39 +00:00
self.callback = ChainProgress.from_progress(on_progress)
return self.callback
2023-02-26 05:49:39 +00:00
def set_cancel(self, cancel: bool = True) -> None:
with self.cancel.get_lock():
self.cancel.value = cancel
def set_idle(self, idle: bool = True) -> None:
with self.idle.get_lock():
self.idle.value = idle
2024-01-06 02:12:41 +00:00
def set_progress(self, steps: int, stages: int = None, tiles: int = None) -> None:
if self.job is None:
raise RuntimeError("no job on which to set progress")
if self.is_cancelled():
raise CancelledException("job has been cancelled")
2024-01-06 02:12:41 +00:00
# update current progress counters
self.steps = self.steps.update(steps)
2024-01-06 02:12:41 +00:00
if stages is not None:
self.stages = self.stages.update(stages)
2024-01-06 02:12:41 +00:00
if tiles is not None:
self.tiles = self.tiles.update(tiles)
2024-01-06 02:12:41 +00:00
# TODO: result should really be part of context at this point
2024-01-04 05:19:56 +00:00
result = None
if self.callback is not None:
result = self.callback.result
2024-01-06 02:12:41 +00:00
# send progress to worker pool
logger.debug("setting progress for job %s to %s", self.job, steps)
self.last_progress = ProgressCommand(
self.job,
self.job_type,
self.device.device,
JobStatus.RUNNING,
2024-01-06 02:12:41 +00:00
steps=self.steps,
stages=self.stages,
tiles=self.tiles,
2024-01-04 05:19:56 +00:00
result=result,
)
self.progress.put(
self.last_progress,
block=False,
)
2024-01-06 02:12:41 +00:00
def set_steps(self, current: int, total: int = 0) -> None:
if total > 0:
self.steps = Progress(current, total)
else:
self.steps = self.steps.update(current)
2024-01-06 02:12:41 +00:00
def set_stages(self, current: int, total: int = 0) -> None:
if total > 0:
self.stages = Progress(current, total)
else:
self.stages = self.stages.update(current)
2024-01-06 02:12:41 +00:00
def set_tiles(self, current: int, total: int = 0) -> None:
if total > 0:
self.tiles = Progress(current, total)
else:
self.tiles = self.tiles.update(current)
2024-01-06 02:12:41 +00:00
def set_totals(self, steps: int, stages: int = 0, tiles: int = 0) -> None:
self.steps = Progress(0, steps)
self.stages.total = Progress(0, stages)
self.tiles.total = Progress(0, tiles)
2024-01-06 02:12:41 +00:00
def finish(self) -> None:
if self.job is None:
logger.warning("setting finished without an active job")
else:
logger.debug("setting finished for job %s", self.job)
2024-01-06 02:12:41 +00:00
result = None
if self.callback is not None:
result = self.callback.result
self.last_progress = ProgressCommand(
self.job,
self.job_type,
self.device.device,
2024-01-04 03:31:41 +00:00
JobStatus.SUCCESS,
2024-01-06 02:12:41 +00:00
steps=self.steps,
stages=self.stages,
tiles=self.tiles,
result=result,
)
self.progress.put(
self.last_progress,
block=False,
)
2024-01-09 04:14:32 +00:00
def fail(self, reason: Optional[str] = None) -> None:
if self.job is None:
logger.warning("setting failure without an active job")
else:
logger.warning("setting failure for job %s", self.job)
try:
self.last_progress = ProgressCommand(
self.job,
self.job_type,
self.device.device,
JobStatus.FAILED,
2024-01-06 02:12:41 +00:00
steps=self.steps,
stages=self.stages,
tiles=self.tiles,
2024-01-09 04:14:32 +00:00
reason=reason,
2024-01-06 02:12:41 +00:00
# TODO: should this include partial results?
)
self.progress.put(
self.last_progress,
block=False,
)
except Exception:
logger.exception("error setting failure on job %s", self.job)