fix(api): add error handling for optimizations
This commit is contained in:
parent
ab6462d095
commit
118695d68c
|
@ -94,24 +94,41 @@ def optimize_pipeline(
|
||||||
) -> None:
|
) -> None:
|
||||||
if "attention-slicing" in server.optimizations:
|
if "attention-slicing" in server.optimizations:
|
||||||
logger.debug("enabling attention slicing on SD pipeline")
|
logger.debug("enabling attention slicing on SD pipeline")
|
||||||
|
try:
|
||||||
pipe.enable_attention_slicing()
|
pipe.enable_attention_slicing()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("error enabling attention slicing: %s", e)
|
||||||
|
|
||||||
if "vae-slicing" in server.optimizations:
|
if "vae-slicing" in server.optimizations:
|
||||||
logger.debug("enabling VAE slicing on SD pipeline")
|
logger.debug("enabling VAE slicing on SD pipeline")
|
||||||
|
try:
|
||||||
pipe.enable_vae_slicing()
|
pipe.enable_vae_slicing()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("error enabling VAE slicing: %s", e)
|
||||||
|
|
||||||
if "sequential-cpu-offload" in server.optimizations:
|
if "sequential-cpu-offload" in server.optimizations:
|
||||||
logger.debug("enabling sequential CPU offload on SD pipeline")
|
logger.debug("enabling sequential CPU offload on SD pipeline")
|
||||||
|
try:
|
||||||
pipe.enable_sequential_cpu_offload()
|
pipe.enable_sequential_cpu_offload()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("error enabling sequential CPU offload: %s", e)
|
||||||
|
|
||||||
elif "model-cpu-offload" in server.optimizations:
|
elif "model-cpu-offload" in server.optimizations:
|
||||||
# TODO: check for accelerate
|
# TODO: check for accelerate
|
||||||
logger.debug("enabling model CPU offload on SD pipeline")
|
logger.debug("enabling model CPU offload on SD pipeline")
|
||||||
|
try:
|
||||||
pipe.enable_model_cpu_offload()
|
pipe.enable_model_cpu_offload()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("error enabling model CPU offload: %s", e)
|
||||||
|
|
||||||
|
|
||||||
if "memory-efficient-attention" in server.optimizations:
|
if "memory-efficient-attention" in server.optimizations:
|
||||||
# TODO: check for xformers
|
# TODO: check for xformers
|
||||||
logger.debug("enabling memory efficient attention for SD pipeline")
|
logger.debug("enabling memory efficient attention for SD pipeline")
|
||||||
|
try:
|
||||||
pipe.enable_xformers_memory_efficient_attention()
|
pipe.enable_xformers_memory_efficient_attention()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("error enabling memory efficient attention: %s", e)
|
||||||
|
|
||||||
|
|
||||||
def load_pipeline(
|
def load_pipeline(
|
||||||
|
|
Loading…
Reference in New Issue