From da464a3fb39ecc6ea7b22fe87271194480d8501c Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Wed, 12 Jul 2023 23:52:43 +0300
Subject: SDXL support

---
 modules/sd_hijack_clip.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 3b5a7666..6c17a81d 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -42,6 +42,10 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         self.hijack: sd_hijack.StableDiffusionModelHijack = hijack
         self.chunk_length = 75
 
+        self.is_trainable = getattr(wrapped, 'is_trainable', False)
+        self.input_key = getattr(wrapped, 'input_key', 'txt')
+        self.legacy_ucg_val = None
+
     def empty_chunk(self):
         """creates an empty PromptChunk and returns it"""
 
@@ -199,8 +203,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         """
         Accepts an array of texts; Passes texts through transformers network to create a tensor with numerical representation of those texts.
         Returns a tensor with shape of (B, T, C), where B is length of the array; T is length, in tokens, of texts (including padding) - T will
-        be a multiple of 77; and C is dimensionality of each token - for SD1 it's 768, and for SD2 it's 1024.
+        be a multiple of 77; and C is dimensionality of each token - for SD1 it's 768, for SD2 it's 1024, and for SDXL it's 1280.
         An example shape returned by this function can be: (2, 77, 768).
+        For SDXL, instead of returning one tensor avobe, it returns a tuple with two: the other one with shape (B, 1280) with pooled values.
         Webui usually sends just one text at a time through this function - the only time when texts is an array with more than one elemenet
         is when you do prompt editing: "a picture of a [cat:dog:0.4] eating ice cream"
         """
@@ -233,7 +238,10 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
             embeddings_list = ", ".join([f'{name} [{embedding.checksum()}]' for name, embedding in used_embeddings.items()])
             self.hijack.comments.append(f"Used embeddings: {embeddings_list}")
 
-        return torch.hstack(zs)
+        if getattr(self.wrapped, 'return_pooled', False):
+            return torch.hstack(zs), zs[0].pooled
+        else:
+            return torch.hstack(zs)
 
     def process_tokens(self, remade_batch_tokens, batch_multipliers):
         """
@@ -256,9 +264,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
         batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
-        z = z * batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
+        z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
-        z = z * (original_mean / new_mean)
+        z *= (original_mean / new_mean)
 
         return z
 
-- 
cgit v1.2.1


From 594c8e7b263d9b37f4b18b56b159aeb6d1bba1b4 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Thu, 13 Jul 2023 11:35:52 +0300
Subject: fix CLIP doing the unneeded normalization revert SD2.1 back to use
 the original repo add SDXL's force_zero_embeddings to negative prompt

---
 modules/sd_hijack_clip.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 6c17a81d..b3771909 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -323,3 +323,18 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
         embedded = embedding_layer.token_embedding.wrapped(ids.to(embedding_layer.token_embedding.wrapped.weight.device)).squeeze(0)
 
         return embedded
+
+
+class FrozenCLIPEmbedderForSDXLWithCustomWords(FrozenCLIPEmbedderWithCustomWords):
+    def __init__(self, wrapped, hijack):
+        super().__init__(wrapped, hijack)
+
+    def encode_with_transformers(self, tokens):
+        outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=self.wrapped.layer == "hidden")
+
+        if self.wrapped.layer == "last":
+            z = outputs.last_hidden_state
+        else:
+            z = outputs.hidden_states[self.wrapped.layer_idx]
+
+        return z
-- 
cgit v1.2.1


From 8284ebd94c4d4f7e7141ebb47af206faab66ffed Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Wed, 26 Jul 2023 13:03:52 +0300
Subject: fix autograd which i broke for no good reason when implementing SDXL

---
 modules/sd_hijack_clip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 5443e609..990533fe 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -273,9 +273,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
         batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
-        z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
+        z = z * batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
         new_mean = z.mean()
-        z *= (original_mean / new_mean)
+        z = z * (original_mean / new_mean)
 
         return z
 
-- 
cgit v1.2.1


From 89e6dfff717e2c2e57009fdabb0ed2b4f57b98a2 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Wed, 26 Jul 2023 15:07:56 +0300
Subject: repair SDXL

---
 modules/sd_hijack_clip.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 990533fe..16a5500e 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -270,6 +270,8 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
 
         z = self.encode_with_transformers(tokens)
 
+        pooled = getattr(z, 'pooled', None)
+
         # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
         batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
         original_mean = z.mean()
@@ -277,6 +279,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
         new_mean = z.mean()
         z = z * (original_mean / new_mean)
 
+        if pooled is not None:
+            z.pooled = pooled
+
         return z
 
 
-- 
cgit v1.2.1


From 6f0abbb71a3f29d6df63fed82d5d5e196ca0d4de Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 29 Jul 2023 15:15:06 +0300
Subject: textual inversion support for SDXL

---
 modules/sd_hijack_clip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 16a5500e..2f9d569b 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -161,7 +161,7 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
                     position += 1
                     continue
 
-                emb_len = int(embedding.vec.shape[0])
+                emb_len = int(embedding.vectors)
                 if len(chunk.tokens) + emb_len > self.chunk_length:
                     next_chunk()
 
-- 
cgit v1.2.1


From f56a309432996936a41040ea723d76a8cb488f87 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 3 Aug 2023 10:25:49 +0900
Subject: fix missing TI hash

---
 modules/sd_hijack_clip.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/sd_hijack_clip.py')

diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 2f9d569b..8f29057a 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -245,6 +245,8 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
                 hashes.append(f"{name}: {shorthash}")
 
             if hashes:
+                if self.hijack.extra_generation_params.get("TI hashes"):
+                    hashes.append(self.hijack.extra_generation_params.get("TI hashes"))
                 self.hijack.extra_generation_params["TI hashes"] = ", ".join(hashes)
 
         if getattr(self.wrapped, 'return_pooled', False):
-- 
cgit v1.2.1