apply lint
This commit is contained in:
parent
79115e5f23
commit
1d9de2c45b
|
@ -2,7 +2,6 @@ from logging import getLogger
|
|||
from os import path
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from onnx import load_model
|
||||
from transformers import CLIPTokenizer
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
from diffusers import OnnxRuntimeModel
|
||||
from logging import getLogger
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
from diffusers import OnnxRuntimeModel
|
||||
|
||||
from ...server import ServerContext
|
||||
from .vae import set_vae_dtype
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -69,4 +69,3 @@ class UNetWrapper(object):
|
|||
)
|
||||
self.prompt_embeds = prompt_embeds
|
||||
self.prompt_index = 0
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
from typing import Union
|
||||
from diffusers.models.autoencoder_kl import AutoencoderKLOutput
|
||||
from diffusers.models.vae import DiagonalGaussianDistribution, DecoderOutput
|
||||
from logging import getLogger
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from diffusers import OnnxRuntimeModel
|
||||
from diffusers.models.autoencoder_kl import AutoencoderKLOutput
|
||||
from diffusers.models.vae import DecoderOutput, DiagonalGaussianDistribution
|
||||
|
||||
from ...server import ServerContext
|
||||
from diffusers import OnnxRuntimeModel
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
@ -37,7 +37,11 @@ class VAEWrapper(object):
|
|||
def __call__(self, latent_sample=None, **kwargs):
|
||||
global timestep_dtype
|
||||
|
||||
logger.trace("VAE %s parameter types: %s", ("decoder" if self.decoder else "encoder"), latent_sample.dtype)
|
||||
logger.trace(
|
||||
"VAE %s parameter types: %s",
|
||||
("decoder" if self.decoder else "encoder"),
|
||||
latent_sample.dtype,
|
||||
)
|
||||
if latent_sample.dtype != timestep_dtype:
|
||||
logger.info("converting VAE sample dtype")
|
||||
latent_sample = latent_sample.astype(timestep_dtype)
|
||||
|
@ -52,16 +56,22 @@ class VAEWrapper(object):
|
|||
|
||||
def blend_v(self, a, b, blend_extent):
|
||||
for y in range(min(a.shape[2], b.shape[2], blend_extent)):
|
||||
b[:, :, y, :] = a[:, :, -blend_extent + y, :] * (1 - y / blend_extent) + b[:, :, y, :] * (y / blend_extent)
|
||||
b[:, :, y, :] = a[:, :, -blend_extent + y, :] * (1 - y / blend_extent) + b[
|
||||
:, :, y, :
|
||||
] * (y / blend_extent)
|
||||
return b
|
||||
|
||||
def blend_h(self, a, b, blend_extent):
|
||||
for x in range(min(a.shape[3], b.shape[3], blend_extent)):
|
||||
b[:, :, :, x] = a[:, :, :, -blend_extent + x] * (1 - x / blend_extent) + b[:, :, :, x] * (x / blend_extent)
|
||||
b[:, :, :, x] = a[:, :, :, -blend_extent + x] * (1 - x / blend_extent) + b[
|
||||
:, :, :, x
|
||||
] * (x / blend_extent)
|
||||
return b
|
||||
|
||||
@torch.no_grad()
|
||||
def tiled_encode(self, x: torch.FloatTensor, return_dict: bool = True) -> AutoencoderKLOutput:
|
||||
def tiled_encode(
|
||||
self, x: torch.FloatTensor, return_dict: bool = True
|
||||
) -> AutoencoderKLOutput:
|
||||
r"""Encode a batch of images using a tiled encoder.
|
||||
Args:
|
||||
When this option is enabled, the VAE will split the input tensor into tiles to compute encoding in several
|
||||
|
@ -84,7 +94,12 @@ class VAEWrapper(object):
|
|||
for i in range(0, x.shape[2], overlap_size):
|
||||
row = []
|
||||
for j in range(0, x.shape[3], overlap_size):
|
||||
tile = x[:, :, i : i + self.tile_sample_min_size, j : j + self.tile_sample_min_size]
|
||||
tile = x[
|
||||
:,
|
||||
:,
|
||||
i : i + self.tile_sample_min_size,
|
||||
j : j + self.tile_sample_min_size,
|
||||
]
|
||||
tile = torch.from_numpy(self.wrapped(latent_sample=tile.numpy())[0])
|
||||
row.append(tile)
|
||||
rows.append(row)
|
||||
|
@ -111,7 +126,9 @@ class VAEWrapper(object):
|
|||
return AutoencoderKLOutput(latent_dist=posterior)
|
||||
|
||||
@torch.no_grad()
|
||||
def tiled_decode(self, z: torch.FloatTensor, return_dict: bool = True) -> Union[DecoderOutput, torch.FloatTensor]:
|
||||
def tiled_decode(
|
||||
self, z: torch.FloatTensor, return_dict: bool = True
|
||||
) -> Union[DecoderOutput, torch.FloatTensor]:
|
||||
r"""Decode a batch of images using a tiled decoder.
|
||||
Args:
|
||||
When this option is enabled, the VAE will split the input tensor into tiles to compute decoding in several
|
||||
|
@ -136,7 +153,12 @@ class VAEWrapper(object):
|
|||
for i in range(0, z.shape[2], overlap_size):
|
||||
row = []
|
||||
for j in range(0, z.shape[3], overlap_size):
|
||||
tile = z[:, :, i : i + self.tile_latent_min_size, j : j + self.tile_latent_min_size]
|
||||
tile = z[
|
||||
:,
|
||||
:,
|
||||
i : i + self.tile_latent_min_size,
|
||||
j : j + self.tile_latent_min_size,
|
||||
]
|
||||
decoded = torch.from_numpy(self.wrapped(latent_sample=tile.numpy())[0])
|
||||
row.append(decoded)
|
||||
rows.append(row)
|
||||
|
@ -160,4 +182,4 @@ class VAEWrapper(object):
|
|||
if not return_dict:
|
||||
return (dec,)
|
||||
|
||||
return DecoderOutput(sample=dec)
|
||||
return DecoderOutput(sample=dec)
|
||||
|
|
|
@ -15,18 +15,16 @@
|
|||
import inspect
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
import PIL
|
||||
import numpy as np
|
||||
import PIL
|
||||
import torch
|
||||
from transformers import CLIPImageProcessor, CLIPTokenizer
|
||||
|
||||
from diffusers.configuration_utils import FrozenDict
|
||||
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
||||
from diffusers.utils import deprecate, logging, PIL_INTERPOLATION
|
||||
from diffusers.pipelines.onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel
|
||||
from diffusers.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.pipelines.onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel
|
||||
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
|
||||
|
||||
from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
|
||||
from diffusers.utils import PIL_INTERPOLATION, deprecate, logging
|
||||
from transformers import CLIPImageProcessor, CLIPTokenizer
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
@ -41,7 +39,10 @@ def preprocess(image):
|
|||
w, h = image[0].size
|
||||
w, h = (x - x % 64 for x in (w, h)) # resize to integer multiple of 64
|
||||
|
||||
image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image]
|
||||
image = [
|
||||
np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :]
|
||||
for i in image
|
||||
]
|
||||
image = np.concatenate(image, axis=0)
|
||||
image = np.array(image).astype(np.float32) / 255.0
|
||||
image = image.transpose(0, 3, 1, 2)
|
||||
|
@ -78,7 +79,10 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
):
|
||||
super().__init__()
|
||||
|
||||
if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1:
|
||||
if (
|
||||
hasattr(scheduler.config, "steps_offset")
|
||||
and scheduler.config.steps_offset != 1
|
||||
):
|
||||
deprecation_message = (
|
||||
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
|
||||
f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
|
||||
|
@ -87,12 +91,17 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
" it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`"
|
||||
" file"
|
||||
)
|
||||
deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False)
|
||||
deprecate(
|
||||
"steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False
|
||||
)
|
||||
new_config = dict(scheduler.config)
|
||||
new_config["steps_offset"] = 1
|
||||
scheduler._internal_dict = FrozenDict(new_config)
|
||||
|
||||
if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True:
|
||||
if (
|
||||
hasattr(scheduler.config, "clip_sample")
|
||||
and scheduler.config.clip_sample is True
|
||||
):
|
||||
deprecation_message = (
|
||||
f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
|
||||
" `clip_sample` should be set to False in the configuration file. Please make sure to update the"
|
||||
|
@ -100,7 +109,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
" future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be very"
|
||||
" nice if you could open a Pull request for the `scheduler/scheduler_config.json` file"
|
||||
)
|
||||
deprecate("clip_sample not set", "1.0.0", deprecation_message, standard_warn=False)
|
||||
deprecate(
|
||||
"clip_sample not set", "1.0.0", deprecation_message, standard_warn=False
|
||||
)
|
||||
new_config = dict(scheduler.config)
|
||||
new_config["clip_sample"] = False
|
||||
scheduler._internal_dict = FrozenDict(new_config)
|
||||
|
@ -180,7 +191,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
return_tensors="np",
|
||||
)
|
||||
text_input_ids = text_inputs.input_ids
|
||||
untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="np").input_ids
|
||||
untruncated_ids = self.tokenizer(
|
||||
prompt, padding="max_length", return_tensors="np"
|
||||
).input_ids
|
||||
|
||||
if not np.array_equal(text_input_ids, untruncated_ids):
|
||||
removed_text = self.tokenizer.batch_decode(
|
||||
|
@ -191,7 +204,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
|
||||
)
|
||||
|
||||
prompt_embeds = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0]
|
||||
prompt_embeds = self.text_encoder(
|
||||
input_ids=text_input_ids.astype(np.int32)
|
||||
)[0]
|
||||
|
||||
prompt_embeds = np.repeat(prompt_embeds, num_images_per_prompt, axis=0)
|
||||
|
||||
|
@ -224,10 +239,14 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
truncation=True,
|
||||
return_tensors="np",
|
||||
)
|
||||
negative_prompt_embeds = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0]
|
||||
negative_prompt_embeds = self.text_encoder(
|
||||
input_ids=uncond_input.input_ids.astype(np.int32)
|
||||
)[0]
|
||||
|
||||
if do_classifier_free_guidance:
|
||||
negative_prompt_embeds = np.repeat(negative_prompt_embeds, num_images_per_prompt, axis=0)
|
||||
negative_prompt_embeds = np.repeat(
|
||||
negative_prompt_embeds, num_images_per_prompt, axis=0
|
||||
)
|
||||
|
||||
# For classifier free guidance, we need to do two forward passes.
|
||||
# Here we concatenate the unconditional and text embeddings into a single batch
|
||||
|
@ -247,10 +266,13 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
negative_prompt_embeds: Optional[np.ndarray] = None,
|
||||
):
|
||||
if height % 8 != 0 or width % 8 != 0:
|
||||
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
|
||||
raise ValueError(
|
||||
f"`height` and `width` have to be divisible by 8 but are {height} and {width}."
|
||||
)
|
||||
|
||||
if (callback_steps is None) or (
|
||||
callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
|
||||
callback_steps is not None
|
||||
and (not isinstance(callback_steps, int) or callback_steps <= 0)
|
||||
):
|
||||
raise ValueError(
|
||||
f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
|
||||
|
@ -266,8 +288,12 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
raise ValueError(
|
||||
"Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
|
||||
)
|
||||
elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
|
||||
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
|
||||
elif prompt is not None and (
|
||||
not isinstance(prompt, str) and not isinstance(prompt, list)
|
||||
):
|
||||
raise ValueError(
|
||||
f"`prompt` has to be of type `str` or `list` but is {type(prompt)}"
|
||||
)
|
||||
|
||||
if negative_prompt is not None and negative_prompt_embeds is not None:
|
||||
raise ValueError(
|
||||
|
@ -381,7 +407,13 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
|
||||
# check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds
|
||||
prompt,
|
||||
height,
|
||||
width,
|
||||
callback_steps,
|
||||
negative_prompt,
|
||||
prompt_embeds,
|
||||
negative_prompt_embeds,
|
||||
)
|
||||
|
||||
# define call parameters
|
||||
|
@ -415,7 +447,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
if latents is None:
|
||||
latents = generator.randn(*latents_shape).astype(latents_dtype)
|
||||
elif latents.shape != latents_shape:
|
||||
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}")
|
||||
raise ValueError(
|
||||
f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}"
|
||||
)
|
||||
|
||||
# set timesteps
|
||||
self.scheduler.set_timesteps(num_inference_steps)
|
||||
|
@ -426,13 +460,20 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
||||
# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
|
||||
# and should be between [0, 1]
|
||||
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
||||
accepts_eta = "eta" in set(
|
||||
inspect.signature(self.scheduler.step).parameters.keys()
|
||||
)
|
||||
extra_step_kwargs = {}
|
||||
if accepts_eta:
|
||||
extra_step_kwargs["eta"] = eta
|
||||
|
||||
timestep_dtype = next(
|
||||
(input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)"
|
||||
(
|
||||
input.type
|
||||
for input in self.unet.model.get_inputs()
|
||||
if input.name == "timestep"
|
||||
),
|
||||
"tensor(float)",
|
||||
)
|
||||
timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
|
||||
|
||||
|
@ -450,23 +491,38 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
latents_for_view = latents[:, :, h_start:h_end, w_start:w_end]
|
||||
|
||||
# expand the latents if we are doing classifier free guidance
|
||||
latent_model_input = np.concatenate([latents_for_view] * 2) if do_classifier_free_guidance else latents_for_view
|
||||
latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t)
|
||||
latent_model_input = (
|
||||
np.concatenate([latents_for_view] * 2)
|
||||
if do_classifier_free_guidance
|
||||
else latents_for_view
|
||||
)
|
||||
latent_model_input = self.scheduler.scale_model_input(
|
||||
torch.from_numpy(latent_model_input), t
|
||||
)
|
||||
latent_model_input = latent_model_input.cpu().numpy()
|
||||
|
||||
# predict the noise residual
|
||||
timestep = np.array([t], dtype=timestep_dtype)
|
||||
noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)
|
||||
noise_pred = self.unet(
|
||||
sample=latent_model_input,
|
||||
timestep=timestep,
|
||||
encoder_hidden_states=prompt_embeds,
|
||||
)
|
||||
noise_pred = noise_pred[0]
|
||||
|
||||
# perform guidance
|
||||
if do_classifier_free_guidance:
|
||||
noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (
|
||||
noise_pred_text - noise_pred_uncond
|
||||
)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
scheduler_output = self.scheduler.step(
|
||||
torch.from_numpy(noise_pred), t, torch.from_numpy(latents_for_view), **extra_step_kwargs
|
||||
torch.from_numpy(noise_pred),
|
||||
t,
|
||||
torch.from_numpy(latents_for_view),
|
||||
**extra_step_kwargs,
|
||||
)
|
||||
latents_view_denoised = scheduler_output.prev_sample.numpy()
|
||||
|
||||
|
@ -484,7 +540,10 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
# image = self.vae_decoder(latent_sample=latents)[0]
|
||||
# it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
|
||||
image = np.concatenate(
|
||||
[self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])]
|
||||
[
|
||||
self.vae_decoder(latent_sample=latents[i : i + 1])[0]
|
||||
for i in range(latents.shape[0])
|
||||
]
|
||||
)
|
||||
|
||||
image = np.clip(image / 2 + 0.5, 0, 1)
|
||||
|
@ -512,7 +571,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
if not return_dict:
|
||||
return (image, has_nsfw_concept)
|
||||
|
||||
return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
|
||||
return StableDiffusionPipelineOutput(
|
||||
images=image, nsfw_content_detected=has_nsfw_concept
|
||||
)
|
||||
|
||||
def img2img(
|
||||
self,
|
||||
|
@ -598,7 +659,13 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
|
||||
# check inputs. Raise error if not correct
|
||||
self.check_inputs(
|
||||
prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds
|
||||
prompt,
|
||||
height,
|
||||
width,
|
||||
callback_steps,
|
||||
negative_prompt,
|
||||
prompt_embeds,
|
||||
negative_prompt_embeds,
|
||||
)
|
||||
|
||||
# define call parameters
|
||||
|
@ -651,7 +718,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
|
||||
noise = generator.randn(*latents.shape).astype(latents_dtype)
|
||||
latents = self.scheduler.add_noise(
|
||||
torch.from_numpy(latents), torch.from_numpy(noise), torch.from_numpy(timesteps)
|
||||
torch.from_numpy(latents),
|
||||
torch.from_numpy(noise),
|
||||
torch.from_numpy(timesteps),
|
||||
)
|
||||
latents = latents.numpy()
|
||||
|
||||
|
@ -659,13 +728,20 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
||||
# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
|
||||
# and should be between [0, 1]
|
||||
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
||||
accepts_eta = "eta" in set(
|
||||
inspect.signature(self.scheduler.step).parameters.keys()
|
||||
)
|
||||
extra_step_kwargs = {}
|
||||
if accepts_eta:
|
||||
extra_step_kwargs["eta"] = eta
|
||||
|
||||
timestep_dtype = next(
|
||||
(input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)"
|
||||
(
|
||||
input.type
|
||||
for input in self.unet.model.get_inputs()
|
||||
if input.name == "timestep"
|
||||
),
|
||||
"tensor(float)",
|
||||
)
|
||||
timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
|
||||
|
||||
|
@ -683,23 +759,38 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
latents_for_view = latents[:, :, h_start:h_end, w_start:w_end]
|
||||
|
||||
# expand the latents if we are doing classifier free guidance
|
||||
latent_model_input = np.concatenate([latents_for_view] * 2) if do_classifier_free_guidance else latents_for_view
|
||||
latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t)
|
||||
latent_model_input = (
|
||||
np.concatenate([latents_for_view] * 2)
|
||||
if do_classifier_free_guidance
|
||||
else latents_for_view
|
||||
)
|
||||
latent_model_input = self.scheduler.scale_model_input(
|
||||
torch.from_numpy(latent_model_input), t
|
||||
)
|
||||
latent_model_input = latent_model_input.cpu().numpy()
|
||||
|
||||
# predict the noise residual
|
||||
timestep = np.array([t], dtype=timestep_dtype)
|
||||
noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)
|
||||
noise_pred = self.unet(
|
||||
sample=latent_model_input,
|
||||
timestep=timestep,
|
||||
encoder_hidden_states=prompt_embeds,
|
||||
)
|
||||
noise_pred = noise_pred[0]
|
||||
|
||||
# perform guidance
|
||||
if do_classifier_free_guidance:
|
||||
noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
||||
noise_pred = noise_pred_uncond + guidance_scale * (
|
||||
noise_pred_text - noise_pred_uncond
|
||||
)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
scheduler_output = self.scheduler.step(
|
||||
torch.from_numpy(noise_pred), t, torch.from_numpy(latents_for_view), **extra_step_kwargs
|
||||
torch.from_numpy(noise_pred),
|
||||
t,
|
||||
torch.from_numpy(latents_for_view),
|
||||
**extra_step_kwargs,
|
||||
)
|
||||
latents_view_denoised = scheduler_output.prev_sample.numpy()
|
||||
|
||||
|
@ -717,7 +808,10 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
# image = self.vae_decoder(latent_sample=latents)[0]
|
||||
# it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
|
||||
image = np.concatenate(
|
||||
[self.vae_decoder(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])]
|
||||
[
|
||||
self.vae_decoder(latent_sample=latents[i : i + 1])[0]
|
||||
for i in range(latents.shape[0])
|
||||
]
|
||||
)
|
||||
|
||||
image = np.clip(image / 2 + 0.5, 0, 1)
|
||||
|
@ -745,14 +839,18 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline):
|
|||
if not return_dict:
|
||||
return (image, has_nsfw_concept)
|
||||
|
||||
return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
|
||||
return StableDiffusionPipelineOutput(
|
||||
images=image, nsfw_content_detected=has_nsfw_concept
|
||||
)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
if len(args) > 0 and (isinstance(args[0], np.ndarray) or isinstance(args[0], PIL.Image.Image)):
|
||||
if len(args) > 0 and (
|
||||
isinstance(args[0], np.ndarray) or isinstance(args[0], PIL.Image.Image)
|
||||
):
|
||||
logger.debug("running img2img panorama pipeline")
|
||||
return self.img2img(*args, **kwargs)
|
||||
else:
|
||||
|
|
|
@ -13,7 +13,13 @@ from ..params import (
|
|||
Size,
|
||||
UpscaleParams,
|
||||
)
|
||||
from ..utils import get_and_clamp_float, get_and_clamp_int, get_boolean, get_from_list, get_not_empty
|
||||
from ..utils import (
|
||||
get_and_clamp_float,
|
||||
get_and_clamp_int,
|
||||
get_boolean,
|
||||
get_from_list,
|
||||
get_not_empty,
|
||||
)
|
||||
from .context import ServerContext
|
||||
from .load import (
|
||||
get_available_platforms,
|
||||
|
|
Loading…
Reference in New Issue