14 files changed, 120 insertions, 39 deletions
diff --git a/modules/cache.py b/modules/cache.py
index a7cd3aeb..ff26a213 100644
--- a/modules/cache.py
+++ b/modules/cache.py
@@ -30,9 +30,12 @@ def dump_cache():
             time.sleep(1)
 
         with cache_lock:
-            with open(cache_filename, "w", encoding="utf8") as file:
+            cache_filename_tmp = cache_filename + "-"
+            with open(cache_filename_tmp, "w", encoding="utf8") as file:
                 json.dump(cache_data, file, indent=4)
 
+            os.replace(cache_filename_tmp, cache_filename)
+
             dump_cache_after = None
             dump_cache_thread = None
 
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 9f8e5b30..f0f361bd 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -35,6 +35,7 @@ parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_
 parser.add_argument("--localizations-dir", type=str, default=os.path.join(script_path, 'localizations'), help="localizations directory")
 parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui")
 parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage")
+parser.add_argument("--medvram-sdxl", action='store_true', help="enable --medvram optimization just for SDXL models")
 parser.add_argument("--lowvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a lot of speed for very low VRM usage")
 parser.add_argument("--lowram", action='store_true', help="load stable diffusion checkpoint weights to VRAM instead of RAM")
 parser.add_argument("--always-batch-cond-uncond", action='store_true', help="does not do anything")
diff --git a/modules/errors.py b/modules/errors.py
index 192cd8ff..a56fd30c 100644
--- a/modules/errors.py
+++ b/modules/errors.py
@@ -95,7 +95,7 @@ def check_versions():
 
     expected_torch_version = "2.0.0"
     expected_xformers_version = "0.0.20"
-    expected_gradio_version = "3.39.0"
+    expected_gradio_version = "3.41.0"
 
     if version.parse(torch.__version__) < version.parse(expected_torch_version):
         print_error_explanation(f"""
diff --git a/modules/interrogate.py b/modules/interrogate.py
index a3ae1dd5..3045560d 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -186,9 +186,8 @@ class InterrogateModels:
         res = ""
         shared.state.begin(job="interrogate")
         try:
-            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
-                lowvram.send_everything_to_cpu()
-                devices.torch_gc()
+            lowvram.send_everything_to_cpu()
+            devices.torch_gc()
 
             self.load()
 
diff --git a/modules/lowvram.py b/modules/lowvram.py
index 96f52b7b..45701046 100644
--- a/modules/lowvram.py
+++ b/modules/lowvram.py
@@ -1,5 +1,5 @@
 import torch
-from modules import devices
+from modules import devices, shared
 
 module_in_gpu = None
 cpu = torch.device("cpu")
@@ -14,6 +14,20 @@ def send_everything_to_cpu():
     module_in_gpu = None
 
 
+def is_needed(sd_model):
+    return shared.cmd_opts.lowvram or shared.cmd_opts.medvram or shared.cmd_opts.medvram_sdxl and hasattr(sd_model, 'conditioner')
+
+
+def apply(sd_model):
+    enable = is_needed(sd_model)
+    shared.parallel_processing_allowed = not enable
+
+    if enable:
+        setup_for_low_vram(sd_model, not shared.cmd_opts.lowvram)
+    else:
+        sd_model.lowvram = False
+
+
 def setup_for_low_vram(sd_model, use_medvram):
     if getattr(sd_model, 'lowvram', False):
         return
@@ -130,4 +144,4 @@ def setup_for_low_vram(sd_model, use_medvram):
 
 
 def is_enabled(sd_model):
-    return getattr(sd_model, 'lowvram', False)
+    return sd_model.lowvram
diff --git a/modules/processing.py b/modules/processing.py
index e60cc92b..7dc931ba 100755..100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -407,12 +407,14 @@ class StableDiffusionProcessing:
         self.main_prompt = self.all_prompts[0]
         self.main_negative_prompt = self.all_negative_prompts[0]
 
-    def cached_params(self, required_prompts, steps, extra_network_data):
+    def cached_params(self, required_prompts, steps, extra_network_data, hires_steps=None, use_old_scheduling=False):
         """Returns parameters that invalidate the cond cache if changed"""
 
         return (
             required_prompts,
             steps,
+            hires_steps,
+            use_old_scheduling,
             opts.CLIP_stop_at_last_layers,
             shared.sd_model.sd_checkpoint_info,
             extra_network_data,
@@ -422,7 +424,7 @@ class StableDiffusionProcessing:
             self.height,
         )
 
-    def get_conds_with_caching(self, function, required_prompts, steps, caches, extra_network_data):
+    def get_conds_with_caching(self, function, required_prompts, steps, caches, extra_network_data, hires_steps=None):
         """
         Returns the result of calling function(shared.sd_model, required_prompts, steps)
         using a cache to store the result if the same arguments have been used before.
@@ -435,7 +437,13 @@ class StableDiffusionProcessing:
         caches is a list with items described above.
         """
 
-        cached_params = self.cached_params(required_prompts, steps, extra_network_data)
+        if shared.opts.use_old_scheduling:
+            old_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(required_prompts, steps, hires_steps, False)
+            new_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(required_prompts, steps, hires_steps, True)
+            if old_schedules != new_schedules:
+                self.extra_generation_params["Old prompt editing timelines"] = True
+
+        cached_params = self.cached_params(required_prompts, steps, extra_network_data, hires_steps, shared.opts.use_old_scheduling)
 
         for cache in caches:
             if cache[0] is not None and cached_params == cache[0]:
@@ -444,7 +452,7 @@ class StableDiffusionProcessing:
         cache = caches[0]
 
         with devices.autocast():
-            cache[1] = function(shared.sd_model, required_prompts, steps)
+            cache[1] = function(shared.sd_model, required_prompts, steps, hires_steps, shared.opts.use_old_scheduling)
 
         cache[0] = cached_params
         return cache[1]
@@ -456,6 +464,8 @@ class StableDiffusionProcessing:
         sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
         total_steps = sampler_config.total_steps(self.steps) if sampler_config else self.steps
         self.step_multiplier = total_steps // self.steps
+        self.firstpass_steps = total_steps
+
         self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, negative_prompts, total_steps, [self.cached_uc], self.extra_network_data)
         self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, prompts, total_steps, [self.cached_c], self.extra_network_data)
 
@@ -1292,8 +1302,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         steps = self.hr_second_pass_steps or self.steps
         total_steps = sampler_config.total_steps(steps) if sampler_config else steps
 
-        self.hr_uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, hr_negative_prompts, total_steps, [self.cached_hr_uc, self.cached_uc], self.hr_extra_network_data)
-        self.hr_c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, hr_prompts, total_steps, [self.cached_hr_c, self.cached_c], self.hr_extra_network_data)
+        self.hr_uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, hr_negative_prompts, self.firstpass_steps, [self.cached_hr_uc, self.cached_uc], self.hr_extra_network_data, total_steps)
+        self.hr_c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, hr_prompts, self.firstpass_steps, [self.cached_hr_c, self.cached_c], self.hr_extra_network_data, total_steps)
 
     def setup_conds(self):
         if self.is_hr_pass:
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index e811ae99..334efeef 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -26,7 +26,7 @@ plain: /([^\\\[\]():|]|\\.)+/
 %import common.SIGNED_NUMBER -> NUMBER
 """)
 
-def get_learned_conditioning_prompt_schedules(prompts, steps):
+def get_learned_conditioning_prompt_schedules(prompts, base_steps, hires_steps=None, use_old_scheduling=False):
     """
     >>> g = lambda p: get_learned_conditioning_prompt_schedules([p], 10)[0]
     >>> g("test")
@@ -57,18 +57,39 @@ def get_learned_conditioning_prompt_schedules(prompts, steps):
     [[1, 'female'], [2, 'male'], [3, 'female'], [4, 'male'], [5, 'female'], [6, 'male'], [7, 'female'], [8, 'male'], [9, 'female'], [10, 'male']]
     >>> g("[fe|||]male")
     [[1, 'female'], [2, 'male'], [3, 'male'], [4, 'male'], [5, 'female'], [6, 'male'], [7, 'male'], [8, 'male'], [9, 'female'], [10, 'male']]
+    >>> g = lambda p: get_learned_conditioning_prompt_schedules([p], 10, 10)[0]
+    >>> g("a [b:.5] c")
+    [[10, 'a b c']]
+    >>> g("a [b:1.5] c")
+    [[5, 'a  c'], [10, 'a b c']]
     """
 
+    if hires_steps is None or use_old_scheduling:
+        int_offset = 0
+        flt_offset = 0
+        steps = base_steps
+    else:
+        int_offset = base_steps
+        flt_offset = 1.0
+        steps = hires_steps
+
     def collect_steps(steps, tree):
         res = [steps]
 
         class CollectSteps(lark.Visitor):
             def scheduled(self, tree):
-                tree.children[-2] = float(tree.children[-2])
-                if tree.children[-2] < 1:
-                    tree.children[-2] *= steps
-                tree.children[-2] = min(steps, int(tree.children[-2]))
-                res.append(tree.children[-2])
+                s = tree.children[-2]
+                v = float(s)
+                if use_old_scheduling:
+                    v = v*steps if v<1 else v
+                else:
+                    if "." in s:
+                        v = (v - flt_offset) * steps
+                    else:
+                        v = (v - int_offset)
+                tree.children[-2] = min(steps, int(v))
+                if tree.children[-2] >= 1:
+                    res.append(tree.children[-2])
 
             def alternate(self, tree):
                 res.extend(range(1, steps+1))
@@ -134,7 +155,7 @@ class SdConditioning(list):
 
 
 
-def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
+def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps, hires_steps=None, use_old_scheduling=False):
     """converts a list of prompts into a list of prompt schedules - each schedule is a list of ScheduledPromptConditioning, specifying the comdition (cond),
     and the sampling step at which this condition is to be replaced by the next one.
 
@@ -154,7 +175,7 @@ def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
     """
     res = []
 
-    prompt_schedules = get_learned_conditioning_prompt_schedules(prompts, steps)
+    prompt_schedules = get_learned_conditioning_prompt_schedules(prompts, steps, hires_steps, use_old_scheduling)
     cache = {}
 
     for prompt, prompt_schedule in zip(prompts, prompt_schedules):
@@ -229,7 +250,7 @@ class MulticondLearnedConditioning:
         self.batch: List[List[ComposableScheduledPromptConditioning]] = batch
 
 
-def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearnedConditioning:
+def get_multicond_learned_conditioning(model, prompts, steps, hires_steps=None, use_old_scheduling=False) -> MulticondLearnedConditioning:
     """same as get_learned_conditioning, but returns a list of ScheduledPromptConditioning along with the weight objects for each prompt.
     For each prompt, the list is obtained by splitting the prompt using the AND separator.
 
@@ -238,7 +259,7 @@ def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearne
 
     res_indexes, prompt_flat_list, prompt_indexes = get_multicond_prompt_list(prompts)
 
-    learned_conditioning = get_learned_conditioning(model, prompt_flat_list, steps)
+    learned_conditioning = get_learned_conditioning(model, prompt_flat_list, steps, hires_steps, use_old_scheduling)
 
     res = []
     for indexes in res_indexes:
diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py
index 0700b853..02841c30 100644
--- a/modules/realesrgan_model.py
+++ b/modules/realesrgan_model.py
@@ -55,6 +55,7 @@ class UpscalerRealESRGAN(Upscaler):
             half=not cmd_opts.no_half and not cmd_opts.upcast_sampling,
             tile=opts.ESRGAN_tile,
             tile_pad=opts.ESRGAN_tile_overlap,
+            device=self.device,
         )
 
         upsampled = upsampler.enhance(np.array(img), outscale=info.scale)[0]
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 27d15e66..547e93c4 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -345,6 +345,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
 
     if shared.cmd_opts.no_half:
         model.float()
+        devices.dtype_unet = torch.float32
         timer.record("apply float()")
     else:
         vae = model.first_stage_model
@@ -362,9 +363,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         if depth_model:
             model.depth_model = depth_model
 
+        devices.dtype_unet = torch.float16
         timer.record("apply half()")
 
-    devices.dtype_unet = torch.float16 if model.is_sdxl and not shared.cmd_opts.no_half else model.model.diffusion_model.dtype
     devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16
 
     model.first_stage_model.to(devices.dtype_vae)
@@ -517,7 +518,7 @@ def get_empty_cond(sd_model):
 
 
 def send_model_to_cpu(m):
-    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+    if m.lowvram:
         lowvram.send_everything_to_cpu()
     else:
         m.to(devices.cpu)
@@ -525,17 +526,17 @@ def send_model_to_cpu(m):
     devices.torch_gc()
 
 
-def model_target_device():
-    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+def model_target_device(m):
+    if lowvram.is_needed(m):
         return devices.cpu
     else:
         return devices.device
 
 
 def send_model_to_device(m):
-    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
-        lowvram.setup_for_low_vram(m, shared.cmd_opts.medvram)
-    else:
+    lowvram.apply(m)
+
+    if not m.lowvram:
         m.to(shared.device)
 
 
@@ -601,7 +602,7 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
             '': torch.float16,
         }
 
-    with sd_disable_initialization.LoadStateDictOnMeta(state_dict, device=model_target_device(), weight_dtype_conversion=weight_dtype_conversion):
+    with sd_disable_initialization.LoadStateDictOnMeta(state_dict, device=model_target_device(sd_model), weight_dtype_conversion=weight_dtype_conversion):
         load_model_weights(sd_model, checkpoint_info, state_dict, timer)
     timer.record("load weights from state dict")
 
@@ -743,7 +744,7 @@ def reload_model_weights(sd_model=None, info=None):
         script_callbacks.model_loaded_callback(sd_model)
         timer.record("script callbacks")
 
-        if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
+        if not sd_model.lowvram:
             sd_model.to(devices.device)
             timer.record("move model to device")
 
diff --git a/modules/sd_models_types.py b/modules/sd_models_types.py
new file mode 100644
index 00000000..5ffd2f4f
--- /dev/null
+++ b/modules/sd_models_types.py
@@ -0,0 +1,31 @@
+from ldm.models.diffusion.ddpm import LatentDiffusion
+from typing import TYPE_CHECKING
+
+
+if TYPE_CHECKING:
+    from modules.sd_models import CheckpointInfo
+
+
+class WebuiSdModel(LatentDiffusion):
+    """This class is not actually instantinated, but its fields are created and fieeld by webui"""
+
+    lowvram: bool
+    """True if lowvram/medvram optimizations are enabled -- see modules.lowvram for more info"""
+
+    sd_model_hash: str
+    """short hash, 10 first characters of SHA1 hash of the model file; may be None if --no-hashing flag is used"""
+
+    sd_model_checkpoint: str
+    """path to the file on disk that model weights were obtained from"""
+
+    sd_checkpoint_info: 'CheckpointInfo'
+    """structure with additional information about the file with model's weights"""
+
+    is_sdxl: bool
+    """True if the model's architecture is SDXL"""
+
+    is_sd2: bool
+    """True if the model's architecture is SD 2.x"""
+
+    is_sd1: bool
+    """True if the model's architecture is SD 1.x"""
diff --git a/modules/sd_unet.py b/modules/sd_unet.py
index 6d708ad2..5525cfbc 100644
--- a/modules/sd_unet.py
+++ b/modules/sd_unet.py
@@ -47,7 +47,7 @@ def apply_unet(option=None):
     if current_unet_option is None:
         current_unet = None
 
-        if not (shared.cmd_opts.lowvram or shared.cmd_opts.medvram):
+        if not shared.sd_model.lowvram:
             shared.sd_model.model.diffusion_model.to(devices.device)
 
         return
diff --git a/modules/sd_vae.py b/modules/sd_vae.py
index ee118656..669097da 100644
--- a/modules/sd_vae.py
+++ b/modules/sd_vae.py
@@ -263,7 +263,7 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified):
     if loaded_vae_file == vae_file:
         return
 
-    if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+    if sd_model.lowvram:
         lowvram.send_everything_to_cpu()
     else:
         sd_model.to(devices.cpu)
@@ -275,7 +275,7 @@ def reload_vae_weights(sd_model=None, vae_file=unspecified):
     sd_hijack.model_hijack.hijack(sd_model)
     script_callbacks.model_loaded_callback(sd_model)
 
-    if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
+    if not sd_model.lowvram:
         sd_model.to(devices.device)
 
     print("VAE weights loaded.")
diff --git a/modules/shared.py b/modules/shared.py
index 0c57b712..63661939 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -2,16 +2,15 @@ import sys
 
 import gradio as gr
 
-from modules import shared_cmd_options, shared_gradio_themes, options, shared_items
+from modules import shared_cmd_options, shared_gradio_themes, options, shared_items, sd_models_types
 from modules.paths_internal import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir  # noqa: F401
-from ldm.models.diffusion.ddpm import LatentDiffusion
 from modules import util
 
 cmd_opts = shared_cmd_options.cmd_opts
 parser = shared_cmd_options.parser
 
 batch_cond_uncond = True  # old field, unused now in favor of shared.opts.batch_cond_uncond
-parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram
+parallel_processing_allowed = True
 styles_filename = cmd_opts.styles_file
 config_filename = cmd_opts.ui_settings_file
 hide_dirs = {"visible": not cmd_opts.hide_ui_dir_config}
@@ -40,7 +39,7 @@ options_templates = None
 opts = None
 restricted_opts = None
 
-sd_model: LatentDiffusion = None
+sd_model: sd_models_types.WebuiSdModel = None
 
 settings_components = None
 """assinged from ui.py, a mapping on setting names to gradio components repsponsible for those settings"""
diff --git a/modules/shared_options.py b/modules/shared_options.py
index 88f6b334..83f56314 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -203,6 +203,7 @@ options_templates.update(options_section(('compatibility', "Compatibility"), {
     "use_old_hires_fix_width_height": OptionInfo(False, "For hires fix, use width/height sliders to set final resolution rather than first pass (disables Upscale by, Resize width/height to)."),
     "dont_fix_second_order_samplers_schedule": OptionInfo(False, "Do not fix prompt schedule for second order samplers."),
     "hires_fix_use_firstpass_conds": OptionInfo(False, "For hires fix, calculate conds of second pass using extra networks of first pass."),
+    "use_old_scheduling": OptionInfo(False, "Use old prompt editing timelines.", infotext="Old prompt editing timelines").info("For [red:green:N]; old: If N < 1, it's a fraction of steps (and hires fix uses range from 0 to 1), if N >= 1, it's an absolute number of steps; new: If N has a decimal point in it, it's a fraction of steps (and hires fix uses range from 1 to 2), othewrwise it's an absolute number of steps"),
 }))
 
 options_templates.update(options_section(('interrogate', "Interrogate"), {