6 files changed, 40 insertions, 3 deletions
diff --git a/modules/api/api.py b/modules/api/api.py
index eee99bbb..6a456861 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -700,4 +700,4 @@ class Api:
 
     def launch(self, server_name, port):
         self.app.include_router(self.router)
-        uvicorn.run(self.app, host=server_name, port=port)
+        uvicorn.run(self.app, host=server_name, port=port, timeout_keep_alive=0)
diff --git a/modules/devices.py b/modules/devices.py
index d8a34a0f..1ed6ffdc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -1,5 +1,7 @@
 import sys
 import contextlib
+from functools import lru_cache
+
 import torch
 from modules import errors
 
@@ -154,3 +156,19 @@ def test_for_nans(x, where):
     message += " Use --disable-nan-check commandline argument to disable this check."
 
     raise NansException(message)
+
+
+@lru_cache
+def first_time_calculation():
+    """
+    just do any calculation with pytorch layers - the first time this is done it allocaltes about 700MB of memory and
+    spends about 2.7 seconds doing that, at least wih NVidia.
+    """
+
+    x = torch.zeros((1, 1)).to(device, dtype)
+    linear = torch.nn.Linear(1, 1).to(device, dtype)
+    linear(x)
+
+    x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
+    conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
+    conv2d(x)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index b1afbaa7..91b3eb11 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -508,6 +508,11 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
 
     timer.record("scripts callbacks")
 
+    with devices.autocast(), torch.no_grad():
+        sd_model.cond_stage_model_empty_prompt = sd_model.cond_stage_model([""])
+
+    timer.record("calculate empty prompt")
+
     print(f"Model loaded in {timer.summary()}.")
 
     return sd_model
diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py
index 59982fc9..638e0ac9 100644
--- a/modules/sd_samplers_kdiffusion.py
+++ b/modules/sd_samplers_kdiffusion.py
@@ -125,6 +125,16 @@ class CFGDenoiser(torch.nn.Module):
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]
 
+        # TODO add infotext entry
+        if shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]:
+            empty = shared.sd_model.cond_stage_model_empty_prompt
+            num_repeats = (tensor.shape[1] - uncond.shape[1]) // empty.shape[1]
+
+            if num_repeats < 0:
+                tensor = torch.cat([tensor, empty.repeat((tensor.shape[0], -num_repeats, 1))], axis=1)
+            elif num_repeats > 0:
+                uncond = torch.cat([uncond, empty.repeat((uncond.shape[0], num_repeats, 1))], axis=1)
+
         if tensor.shape[1] == uncond.shape[1] or skip_uncond:
             if is_edit_model:
                 cond_in = torch.cat([tensor, uncond, uncond])
diff --git a/modules/shared.py b/modules/shared.py
index 3099d1d2..0897f937 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -423,6 +423,7 @@ options_templates.update(options_section(('optimizations', "Optimizations"), {
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
+    "pad_cond_uncond": OptionInfo(False, "Pad prompt/negative prompt to be same length").info("improves performance when prompt and negative prompt have different lengths; changes seeds"),
 }))
 
 options_templates.update(options_section(('compatibility', "Compatibility"), {
@@ -487,6 +488,7 @@ options_templates.update(options_section(('ui', "User interface"), {
     "ui_reorder": OptionInfo(", ".join(ui_reorder_categories), "txt2img/img2img UI item order").needs_restart(),
     "hires_fix_show_sampler": OptionInfo(False, "Hires fix: show hires sampler selection").needs_restart(),
     "hires_fix_show_prompts": OptionInfo(False, "Hires fix: show hires prompt and negative prompt").needs_restart(),
+    "disable_token_counters": OptionInfo(False, "Disable prompt token counters").needs_restart(),
 }))
 
 options_templates.update(options_section(('infotext', "Infotext"), {
diff --git a/modules/ui.py b/modules/ui.py
index e62182da..001b9792 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -616,7 +616,8 @@ def create_ui():
                 outputs=[
                     txt2img_prompt,
                     txt_prompt_img
-                ]
+                ],
+                show_progress=False,
             )
 
             enable_hr.change(
@@ -902,7 +903,8 @@ def create_ui():
                 outputs=[
                     img2img_prompt,
                     img2img_prompt_img
-                ]
+                ],
+                show_progress=False,
             )
 
             img2img_args = dict(