diff --git a/api/onnx_web/diffusers/pipelines/panorama.py b/api/onnx_web/diffusers/pipelines/panorama.py index 987a07e9..42f9e721 100644 --- a/api/onnx_web/diffusers/pipelines/panorama.py +++ b/api/onnx_web/diffusers/pipelines/panorama.py @@ -566,6 +566,7 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline): count = np.zeros((latents.shape[0], latents.shape[1], *resize)) value = np.zeros((latents.shape[0], latents.shape[1], *resize)) + # adjust latents latents = expand_latents( latents, generator.randint(np.iinfo(np.int32).max), @@ -974,8 +975,16 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline): # panorama additions views, resize = self.get_views(height, width, self.window, self.stride) - count = np.zeros_like(latents) - value = np.zeros_like(latents) + count = np.zeros((latents.shape[0], latents.shape[1], *resize)) + value = np.zeros((latents.shape[0], latents.shape[1], *resize)) + + # adjust latents + latents = expand_latents( + latents, + generator.randint(np.iinfo(np.int32).max), + Size(resize[1], resize[0]), + sigma=self.scheduler.init_noise_sigma, + ) for i, t in enumerate(self.progress_bar(timesteps)): count.fill(0) @@ -1031,6 +1040,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) + # remove extra margins + latents = latents[:, :, 0:(height // 8), 0:(width // 8)] + latents = 1 / 0.18215 * latents # image = self.vae_decoder(latent_sample=latents)[0] # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 @@ -1282,8 +1294,16 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline): # panorama additions views, resize = self.get_views(height, width, self.window, self.stride) - count = np.zeros_like(latents) - value = np.zeros_like(latents) + count = np.zeros((latents.shape[0], latents.shape[1], *resize)) + value = np.zeros((latents.shape[0], latents.shape[1], *resize)) + + # adjust latents + latents = expand_latents( + latents, + generator.randint(np.iinfo(np.int32).max), + Size(resize[1], resize[0]), + sigma=self.scheduler.init_noise_sigma, + ) for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): count.fill(0) @@ -1346,6 +1366,9 @@ class OnnxStableDiffusionPanoramaPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) + # remove extra margins + latents = latents[:, :, 0:(height // 8), 0:(width // 8)] + latents = 1 / 0.18215 * latents # image = self.vae_decoder(latent_sample=latents)[0] # it seems likes there is a strange result for using half-precision vae decoder if batchsize>1 diff --git a/api/onnx_web/diffusers/pipelines/panorama_xl.py b/api/onnx_web/diffusers/pipelines/panorama_xl.py index c5fea7d8..9cd87b49 100644 --- a/api/onnx_web/diffusers/pipelines/panorama_xl.py +++ b/api/onnx_web/diffusers/pipelines/panorama_xl.py @@ -810,8 +810,15 @@ class StableDiffusionXLPanoramaPipelineMixin(StableDiffusionXLImg2ImgPipelineMix # 8. Panorama additions views, resize = self.get_views(height, width, self.window, self.stride) - count = np.zeros_like(latents) - value = np.zeros_like(latents) + count = np.zeros((latents.shape[0], latents.shape[1], *resize)) + value = np.zeros((latents.shape[0], latents.shape[1], *resize)) + + latents = expand_latents( + latents, + generator.randint(np.iinfo(np.int32).max), + Size(resize[1], resize[0]), + sigma=self.scheduler.init_noise_sigma, + ) # 8. Denoising loop num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order @@ -881,6 +888,9 @@ class StableDiffusionXLPanoramaPipelineMixin(StableDiffusionXLImg2ImgPipelineMix if callback is not None and i % callback_steps == 0: callback(i, t, latents) + # remove extra margins + latents = latents[:, :, 0:(height // 8), 0:(width // 8)] + if output_type == "latent": image = latents else: