From 5841990b0df04906da7321beef6f7f7902b7d57b Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 05:38:38 +0100 Subject: Update textual_inversion.py --- modules/textual_inversion/textual_inversion.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index cd9f3498..f6316020 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,6 +7,9 @@ import tqdm import html import datetime +from PIL import Image, PngImagePlugin +import base64 +from io import BytesIO from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -80,7 +83,15 @@ class EmbeddingDatabase: def process_file(path, filename): name = os.path.splitext(filename)[0] - data = torch.load(path, map_location="cpu") + data = [] + + if filename.upper().endswith('.PNG'): + embed_image = Image.open(path) + if 'sd-embedding' in embed_image.text: + embeddingData = base64.b64decode(embed_image.text['sd-embedding']) + data = torch.load(BytesIO(embeddingData), map_location="cpu") + else: + data = torch.load(path, map_location="cpu") # textual inversion embeddings if 'string_to_param' in data: @@ -156,7 +167,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -244,7 +255,15 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, image = processed.images[0] shared.state.current_image = image - image.save(last_saved_image) + + if save_image_with_stored_embedding: + info = PngImagePlugin.PngInfo() + info.add_text("sd-embedding", base64.b64encode(open(last_saved_file,'rb').read())) + image.save(last_saved_image, "PNG", pnginfo=info) + else: + image.save(last_saved_image) + + last_saved_image += f", prompt: {text}" -- cgit v1.2.1 From cd8673bd9b2e59bddefee8d307340d643695fe11 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 05:40:57 +0100 Subject: add embed embedding to ui --- modules/ui.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/ui.py b/modules/ui.py index b51af121..a5983204 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1001,7 +1001,8 @@ def create_ui(wrap_gradio_gpu_call): steps = gr.Number(label='Max steps', value=100000, precision=0) create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0) save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0) - + save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True) + with gr.Row(): with gr.Column(scale=2): gr.HTML(value="") @@ -1063,6 +1064,7 @@ def create_ui(wrap_gradio_gpu_call): create_image_every, save_embedding_every, template_file, + save_image_with_stored_embedding, ], outputs=[ ti_output, -- cgit v1.2.1 From fa0c5eb81b72bc1e562d0b9bbd92f30945d78b4e Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 20:41:22 +0100 Subject: Add pretty image captioning functions --- modules/images.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/modules/images.py b/modules/images.py index 29c5ee24..10963dc7 100644 --- a/modules/images.py +++ b/modules/images.py @@ -428,3 +428,34 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i file.write(info + "\n") return fullfn + +def addCaptionLines(lines,image,initialx,textfont): + draw = ImageDraw.Draw(image) + hstart =initialx + for fill,line in lines: + fontSize = 32 + font = ImageFont.truetype(textfont, fontSize) + _,_,w, h = draw.textbbox((0,0),line,font=font) + fontSize = min( int(fontSize * ((image.size[0]-35)/w) ), 28) + font = ImageFont.truetype(textfont, fontSize) + _,_,w,h = draw.textbbox((0,0),line,font=font) + draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) + hstart += h + return hstart + +def captionImge(image,prelines,postlines,background=(51, 51, 51),font=None): + if font is None: + try: + font = ImageFont.truetype(opts.font or Roboto, fontsize) + font = opts.font or Roboto + except Exception: + font = Roboto + + sampleImage = image + background = Image.new("RGBA", (sampleImage.size[0],sampleImage.size[1]+1024), background) + hoffset = addCaptionLines(prelines,background,5,font)+16 + background.paste(sampleImage,(0,hoffset)) + hoffset = hoffset+sampleImage.size[1]+8 + hoffset = addCaptionLines(postlines,background,hoffset,font) + background = background.crop((0,0,sampleImage.size[0],hoffset+8)) + return background -- cgit v1.2.1 From 03694e1f9915e34cf7d9a31073f1a1a9def2909f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 21:58:14 +0100 Subject: add embedding load and save from b64 json --- modules/textual_inversion/textual_inversion.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index f6316020..1b7f8906 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,9 +7,11 @@ import tqdm import html import datetime -from PIL import Image, PngImagePlugin +from PIL import Image,PngImagePlugin +from ..images import captionImge +import numpy as np import base64 -from io import BytesIO +import json from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -87,9 +89,9 @@ class EmbeddingDatabase: if filename.upper().endswith('.PNG'): embed_image = Image.open(path) - if 'sd-embedding' in embed_image.text: - embeddingData = base64.b64decode(embed_image.text['sd-embedding']) - data = torch.load(BytesIO(embeddingData), map_location="cpu") + if 'sd-ti-embedding' in embed_image.text: + data = embeddingFromB64(embed_image.text['sd-ti-embedding']) + name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -258,13 +260,23 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, if save_image_with_stored_embedding: info = PngImagePlugin.PngInfo() - info.add_text("sd-embedding", base64.b64encode(open(last_saved_file,'rb').read())) - image.save(last_saved_image, "PNG", pnginfo=info) + data = torch.load(last_saved_file) + info.add_text("sd-ti-embedding", embeddingToB64(data)) + + pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] + + caption_checkpoint_hash = data.get('sd_checkpoint','UNK') + caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNK' + caption_stepcount = data.get('step',0) + caption_stepcount = caption_stepcount if caption_stepcount else 0 + + post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(caption_checkpoint_hash, + caption_stepcount))] + captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) + captioned_image.save(last_saved_image, "PNG", pnginfo=info) else: image.save(last_saved_image) - - last_saved_image += f", prompt: {text}" shared.state.job_no = embedding.step -- cgit v1.2.1 From 969bd8256e5b4f1007d3cc653723d4ad50a92528 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:02:28 +0100 Subject: add alternate checkpoint hash source --- modules/textual_inversion/textual_inversion.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 1b7f8906..d7813084 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -265,8 +265,11 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - caption_checkpoint_hash = data.get('sd_checkpoint','UNK') - caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNK' + caption_checkpoint_hash = data.get('sd_checkpoint') + if caption_checkpoint_hash is None: + caption_checkpoint_hash = data.get('hash') + caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNKNOWN' + caption_stepcount = data.get('step',0) caption_stepcount = caption_stepcount if caption_stepcount else 0 -- cgit v1.2.1 From 5d12ec82d3e13f5ff4c55db2930e4e10aed7015a Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:05:09 +0100 Subject: add encoder and decoder classes --- modules/textual_inversion/textual_inversion.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d7813084..44d4e08b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -16,6 +16,27 @@ import json from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +class EmbeddingEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, torch.Tensor): + return {'EMBEDDINGTENSOR':obj.cpu().detach().numpy().tolist()} + return json.JSONEncoder.default(self, o) + +class EmbeddingDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): + if 'EMBEDDINGTENSOR' in d: + return torch.from_numpy(np.array(d['EMBEDDINGTENSOR'])) + return d + +def embeddingToB64(data): + d = json.dumps(data,cls=EmbeddingEncoder) + return base64.b64encode(d.encode()) + +def EmbeddingFromB64(data): + d = base64.b64decode(data) + return json.loads(d,cls=EmbeddingDecoder) class Embedding: def __init__(self, vec, name, step=None): -- cgit v1.2.1 From d0184b8f76ce492da699f1926f34b57cd095242e Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:06:12 +0100 Subject: change json tensor key name --- modules/textual_inversion/textual_inversion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 44d4e08b..ae8d207d 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -19,15 +19,15 @@ import modules.textual_inversion.dataset class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): - return {'EMBEDDINGTENSOR':obj.cpu().detach().numpy().tolist()} + return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} return json.JSONEncoder.default(self, o) class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) def object_hook(self, d): - if 'EMBEDDINGTENSOR' in d: - return torch.from_numpy(np.array(d['EMBEDDINGTENSOR'])) + if 'TORCHTENSOR' in d: + return torch.from_numpy(np.array(d['TORCHTENSOR'])) return d def embeddingToB64(data): -- cgit v1.2.1 From 66846105103cfc282434d0dc2102910160b7a633 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:06:42 +0100 Subject: correct case on embeddingFromB64 --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ae8d207d..d2b95fa3 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -34,7 +34,7 @@ def embeddingToB64(data): d = json.dumps(data,cls=EmbeddingEncoder) return base64.b64encode(d.encode()) -def EmbeddingFromB64(data): +def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) -- cgit v1.2.1 From 96f1e6be59316ec640cab2435fa95b3688194906 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:14:50 +0100 Subject: source checkpoint hash from current checkpoint --- modules/textual_inversion/textual_inversion.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d2b95fa3..b16fa84e 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -286,10 +286,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - caption_checkpoint_hash = data.get('sd_checkpoint') - if caption_checkpoint_hash is None: - caption_checkpoint_hash = data.get('hash') - caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNKNOWN' + checkpoint = sd_models.select_checkpoint() + caption_checkpoint_hash = checkpoint.hash caption_stepcount = data.get('step',0) caption_stepcount = caption_stepcount if caption_stepcount else 0 -- cgit v1.2.1 From 01fd9cf0d28d8b71a113ab1aa62accfe7f0d9c51 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:17:02 +0100 Subject: change source of step count --- modules/textual_inversion/textual_inversion.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index b16fa84e..e4f339b8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -285,15 +285,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, info.add_text("sd-ti-embedding", embeddingToB64(data)) pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - checkpoint = sd_models.select_checkpoint() - caption_checkpoint_hash = checkpoint.hash - - caption_stepcount = data.get('step',0) - caption_stepcount = caption_stepcount if caption_stepcount else 0 - - post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(caption_checkpoint_hash, - caption_stepcount))] + post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(checkpoint.hash, + embedding.step))] captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) captioned_image.save(last_saved_image, "PNG", pnginfo=info) else: -- cgit v1.2.1 From 0ac3a07eecbd7b98f3a19d01dc46f02dcda3443b Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:05:36 +0100 Subject: add caption image with overlay --- modules/images.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/modules/images.py b/modules/images.py index 10963dc7..4a4fc977 100644 --- a/modules/images.py +++ b/modules/images.py @@ -459,3 +459,49 @@ def captionImge(image,prelines,postlines,background=(51, 51, 51),font=None): hoffset = addCaptionLines(postlines,background,hoffset,font) background = background.crop((0,0,sampleImage.size[0],hoffset+8)) return background + +def captionImageOverlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): + from math import cos + + image = srcimage.copy() + + if textfont is None: + try: + textfont = ImageFont.truetype(opts.font or Roboto, fontsize) + textfont = opts.font or Roboto + except Exception: + textfont = Roboto + + factor = 1.5 + gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) + for y in range(image.size[1]): + mag = 1-cos(y/image.size[1]*factor) + mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0,0,0,int(mag*255))) + image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) + + draw = ImageDraw.Draw(image) + fontSize = 32 + font = ImageFont.truetype(textfont, fontSize) + padding = 10 + + _,_,w, h = draw.textbbox((0,0),title,font=font) + fontSize = min( int(fontSize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) + font = ImageFont.truetype(textfont, fontSize) + _,_,w,h = draw.textbbox((0,0),title,font=font) + draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) + + _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) + fontSizeleft = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerMid,font=font) + fontSizemid = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerRight,font=font) + fontSizeright = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) + + font = ImageFont.truetype(textfont, min(fontSizeleft,fontSizemid,fontSizeright)) + + draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) + + return image -- cgit v1.2.1 From d6a599ef9ba18a66ae79b50f2945af5788fdda8f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:07:52 +0100 Subject: change caption method --- modules/textual_inversion/textual_inversion.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e4f339b8..21596e78 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -8,7 +8,7 @@ import html import datetime from PIL import Image,PngImagePlugin -from ..images import captionImge +from ..images import captionImageOverlay import numpy as np import base64 import json @@ -212,6 +212,12 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, else: images_dir = None + if create_image_every > 0 and save_image_with_stored_embedding: + images_embeds_dir = os.path.join(log_directory, "image_embeddings") + os.makedirs(images_embeds_dir, exist_ok=True) + else: + images_embeds_dir = None + cond_model = shared.sd_model.cond_stage_model shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." @@ -279,19 +285,25 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, shared.state.current_image = image - if save_image_with_stored_embedding: + if save_image_with_stored_embedding and os.path.exists(last_saved_file): + + last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png') + info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) info.add_text("sd-ti-embedding", embeddingToB64(data)) - pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] + title = "<{}>".format(data.get('name','???')) checkpoint = sd_models.select_checkpoint() - post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(checkpoint.hash, - embedding.step))] - captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) - captioned_image.save(last_saved_image, "PNG", pnginfo=info) - else: - image.save(last_saved_image) + footer_left = checkpoint.model_name + footer_mid = '[{}]'.format(checkpoint.hash) + footer_right = '[{}]'.format(embedding.step) + + captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) + + captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) + + image.save(last_saved_image) last_saved_image += f", prompt: {text}" -- cgit v1.2.1 From e2c2925eb4d634b186de2c76798162ec56e2f869 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:12:53 +0100 Subject: remove braces from steps --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 21596e78..9a18ee5c 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -297,7 +297,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, checkpoint = sd_models.select_checkpoint() footer_left = checkpoint.model_name footer_mid = '[{}]'.format(checkpoint.hash) - footer_right = '[{}]'.format(embedding.step) + footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) -- cgit v1.2.1 From 6435691bb11c5a35703720bfd2a875f24c066f86 Mon Sep 17 00:00:00 2001 From: Justin Maier Date: Sun, 9 Oct 2022 19:26:52 -0600 Subject: Add "Scale to" option to Extras --- javascript/ui.js | 3 ++- modules/extras.py | 28 +++++++++++++++++++++++----- modules/ui.py | 38 +++++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/javascript/ui.js b/javascript/ui.js index b1053201..4100944e 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -101,7 +101,8 @@ function create_tab_index_args(tabId, args){ } function get_extras_tab_index(){ - return create_tab_index_args('mode_extras', arguments) + const [,,...args] = [...arguments] + return [get_tab_index('mode_extras'), get_tab_index('extras_resize_mode'), ...args] } function create_submit_args(args){ diff --git a/modules/extras.py b/modules/extras.py index 41e8612c..83ca7049 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -1,3 +1,4 @@ +import math import os import numpy as np @@ -19,7 +20,7 @@ import gradio as gr cached_images = {} -def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility): +def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility): devices.torch_gc() imageArr = [] @@ -67,8 +68,23 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n" image = res + if resize_mode == 1: + upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height) + crop_info = " (crop)" if upscaling_crop else "" + info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n" + + def crop_upscaled_center(image, resize_w, resize_h): + left = int(math.ceil((image.width - resize_w) / 2)) + right = image.width - int(math.floor((image.width - resize_w) / 2)) + top = int(math.ceil((image.height - resize_h) / 2)) + bottom = image.height - int(math.floor((image.height - resize_h) / 2)) + + image = image.crop((left, top, right, bottom)) + return image + + if upscaling_resize != 1.0: - def upscale(image, scaler_index, resize): + def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop): small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10)) pixels = tuple(np.array(small).flatten().tolist()) key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels @@ -77,15 +93,17 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v if c is None: upscaler = shared.sd_upscalers[scaler_index] c = upscaler.scaler.upscale(image, resize, upscaler.data_path) + if mode == 1 and crop: + c = crop_upscaled_center(c, resize_w, resize_h) cached_images[key] = c return c info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n" - res = upscale(image, extras_upscaler_1, upscaling_resize) + res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0: - res2 = upscale(image, extras_upscaler_2, upscaling_resize) + res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n" res = Image.blend(res, res2, extras_upscaler_2_visibility) @@ -190,7 +208,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, interp_method, int theta_0[key] = theta_func(theta_0[key], theta_1[key], (float(1.0) - interp_amount)) # Need to reverse the interp_amount to match the desired mix ration in the merged checkpoint if save_as_half: theta_0[key] = theta_0[key].half() - + for key in theta_1.keys(): if 'model' in key and key not in theta_0: theta_0[key] = theta_1[key] diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..4bb2892b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -101,7 +101,7 @@ def send_gradio_gallery_to_image(x): def save_files(js_data, images, do_make_zip, index): - import csv + import csv filenames = [] fullfns = [] @@ -551,7 +551,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) - + with gr.Row(): download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) @@ -739,7 +739,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) - + with gr.Row(): download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) @@ -903,7 +903,15 @@ def create_ui(wrap_gradio_gpu_call): with gr.TabItem('Batch Process'): image_batch = gr.File(label="Batch Process", file_count="multiple", interactive=True, type="file") - upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2) + with gr.Tabs(elem_id="extras_resize_mode"): + with gr.TabItem('Scale by'): + upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2) + with gr.TabItem('Scale to'): + with gr.Group(): + with gr.Row(): + upscaling_resize_w = gr.Number(label="Width", value=512) + upscaling_resize_h = gr.Number(label="Height", value=512) + upscaling_crop = gr.Checkbox(label='Crop to fit', value=True) with gr.Group(): extras_upscaler_1 = gr.Radio(label='Upscaler 1', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index") @@ -934,6 +942,7 @@ def create_ui(wrap_gradio_gpu_call): fn=wrap_gradio_gpu_call(modules.extras.run_extras), _js="get_extras_tab_index", inputs=[ + dummy_component, dummy_component, extras_image, image_batch, @@ -941,6 +950,9 @@ def create_ui(wrap_gradio_gpu_call): codeformer_visibility, codeformer_weight, upscaling_resize, + upscaling_resize_w, + upscaling_resize_h, + upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, @@ -951,14 +963,14 @@ def create_ui(wrap_gradio_gpu_call): html_info, ] ) - + extras_send_to_img2img.click( fn=lambda x: image_from_url_text(x), _js="extract_image_from_gallery_img2img", inputs=[result_images], outputs=[init_img], ) - + extras_send_to_inpaint.click( fn=lambda x: image_from_url_text(x), _js="extract_image_from_gallery_img2img", @@ -1286,7 +1298,7 @@ Requested path was: {f} outputs=[], _js='function(){restart_reload()}' ) - + if column is not None: column.__exit__() @@ -1318,12 +1330,12 @@ Requested path was: {f} component_dict[k] = component settings_interface.gradio_ref = demo - + with gr.Tabs() as tabs: for interface, label, ifid in interfaces: with gr.TabItem(label, id=ifid): interface.render() - + if os.path.exists(os.path.join(script_path, "notification.mp3")): audio_notification = gr.Audio(interactive=False, value=os.path.join(script_path, "notification.mp3"), elem_id="audio_notification", visible=False) @@ -1456,10 +1468,10 @@ Requested path was: {f} if getattr(obj,'custom_script_source',None) is not None: key = 'customscript/' + obj.custom_script_source + '/' + key - + if getattr(obj, 'do_not_save_to_config', False): return - + saved_value = ui_settings.get(key, None) if saved_value is None: ui_settings[key] = getattr(obj, field) @@ -1483,10 +1495,10 @@ Requested path was: {f} if type(x) == gr.Textbox: apply_field(x, 'value') - + if type(x) == gr.Number: apply_field(x, 'value') - + visit(txt2img_interface, loadsave, "txt2img") visit(img2img_interface, loadsave, "img2img") visit(extras_interface, loadsave, "extras") -- cgit v1.2.1 From 1f92336be768d235c18a82acb2195b7135101ae7 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Sun, 9 Oct 2022 23:58:18 -0500 Subject: refactored the deepbooru module to improve speed on running multiple interogations in a row. Added the option to generate deepbooru tags for textual inversion preproccessing. --- modules/deepbooru.py | 84 +++++++++++++++++++++++++-------- modules/textual_inversion/preprocess.py | 22 ++++++++- modules/ui.py | 52 ++++++++++++++------ 3 files changed, 122 insertions(+), 36 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 7e3c0618..cee4a3b4 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,21 +1,74 @@ import os.path from concurrent.futures import ProcessPoolExecutor -from multiprocessing import get_context +import multiprocessing -def _load_tf_and_return_tags(pil_image, threshold): +def get_deepbooru_tags(pil_image, threshold=0.5): + """ + This method is for running only one image at a time for simple use. Used to the img2img interrogate. + """ + from modules import shared # prevents circular reference + create_deepbooru_process(threshold) + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(pil_image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + release_process() + return ret + + +def deepbooru_process(queue, deepbooru_process_return, threshold): + model, tags = get_deepbooru_tags_model() + while True: # while process is running, keep monitoring queue for new image + pil_image = queue.get() + if pil_image == "QUIT": + break + else: + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + + +def create_deepbooru_process(threshold=0.5): + """ + Creates deepbooru process. A queue is created to send images into the process. This enables multiple images + to be processed in a row without reloading the model or creating a new process. To return the data, a shared + dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned + to the dictionary and the method adding the image to the queue should wait for this value to be updated with + the tags. + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_manager = multiprocessing.Manager() + shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() + shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process.start() + + +def release_process(): + """ + Stops the deepbooru process to return used memory + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_queue.put("QUIT") + shared.deepbooru_process.join() + shared.deepbooru_process_queue = None + shared.deepbooru_process = None + shared.deepbooru_process_return = None + shared.deepbooru_process_manager = None + +def get_deepbooru_tags_model(): import deepdanbooru as dd import tensorflow as tf import numpy as np - this_folder = os.path.dirname(__file__) model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) if not os.path.exists(os.path.join(model_path, 'project.json')): # there is no point importing these every time import zipfile from basicsr.utils.download_util import load_file_from_url - load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", - model_path) + load_file_from_url( + r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: zip_ref.extractall(model_path) os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) @@ -24,7 +77,13 @@ def _load_tf_and_return_tags(pil_image, threshold): model = dd.project.load_model_from_project( model_path, compile_model=True ) + return model, tags + +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np width = model.input_shape[2] height = model.input_shape[1] image = np.array(pil_image) @@ -57,17 +116,4 @@ def _load_tf_and_return_tags(pil_image, threshold): print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') - - -def subprocess_init_no_cuda(): - import os - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - - -def get_deepbooru_tags(pil_image, threshold=0.5): - context = get_context('spawn') - with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor: - f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) - ret = f.result() # will rethrow any exceptions - return ret \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..9f63c9a4 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -3,11 +3,14 @@ from PIL import Image, ImageOps import platform import sys import tqdm +import time from modules import shared, images +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru - -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): +def preprocess(process_src, process_dst, process_flip, process_split, process_caption, process_caption_deepbooru=False): size = 512 src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -24,10 +27,21 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.load() + if process_caption_deepbooru: + deepbooru.create_deepbooru_process() + def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + elif process_caption_deepbooru: + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = "-" + shared.deepbooru_process_return["value"] + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + shared.deepbooru_process_return["value"] = -1 else: caption = filename caption = os.path.splitext(caption)[0] @@ -79,6 +93,10 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + if process_caption_deepbooru: + deepbooru.release_process() + + def sanitize_caption(base_path, original_caption, suffix): operating_system = platform.system().lower() if (operating_system == "windows"): diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..179e3a83 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1034,6 +1034,9 @@ def create_ui(wrap_gradio_gpu_call): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') process_caption = gr.Checkbox(label='Use BLIP caption as filename') + if cmd_opts.deepdanbooru: + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') + with gr.Row(): with gr.Column(scale=3): @@ -1086,21 +1089,40 @@ def create_ui(wrap_gradio_gpu_call): ] ) - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) + if cmd_opts.deepdanbooru: + # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + process_caption_deepbooru, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) + else: + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) train_embedding.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), -- cgit v1.2.1 From 8ec069e64df48f8f202f8b93a08e91b69448eb39 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 03:23:24 -0500 Subject: removed duplicate run_preprocess.click by creating run_preprocess_inputs list and appending deepbooru variable to input list if in scope --- modules/ui.py | 49 +++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 179e3a83..22ca74c2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1089,40 +1089,25 @@ def create_ui(wrap_gradio_gpu_call): ] ) + run_preprocess_inputs = [ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + ] if cmd_opts.deepdanbooru: # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - process_caption_deepbooru, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) - else: - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) + run_preprocess_inputs.append(process_caption_deepbooru) + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=run_preprocess_inputs, + outputs=[ + ti_output, + ti_outcome, + ], + ) train_embedding.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), -- cgit v1.2.1 From 2f94331df2cb1181439adecc28cfd758049f6501 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 03:34:00 -0500 Subject: removed change in last commit, simplified to adding the visible argument to process_caption_deepbooru and it set to False if deepdanbooru argument is not set --- modules/ui.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 22ca74c2..f8adafb3 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1036,7 +1036,8 @@ def create_ui(wrap_gradio_gpu_call): process_caption = gr.Checkbox(label='Use BLIP caption as filename') if cmd_opts.deepdanbooru: process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') - + else: + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename', visible=False) with gr.Row(): with gr.Column(scale=3): @@ -1089,20 +1090,17 @@ def create_ui(wrap_gradio_gpu_call): ] ) - run_preprocess_inputs = [ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ] - if cmd_opts.deepdanbooru: - # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled - run_preprocess_inputs.append(process_caption_deepbooru) run_preprocess.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), _js="start_training_textual_inversion", - inputs=run_preprocess_inputs, + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + process_caption_deepbooru + ], outputs=[ ti_output, ti_outcome, -- cgit v1.2.1 From 707a431100362645e914042bb344d08439f48ac8 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 15:34:49 +0100 Subject: add pixel data footer --- modules/textual_inversion/textual_inversion.py | 48 ++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 7a24192e..6fb64691 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,6 +12,7 @@ from ..images import captionImageOverlay import numpy as np import base64 import json +import zlib from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -20,7 +21,7 @@ class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} - return json.JSONEncoder.default(self, o) + return json.JSONEncoder.default(self, obj) class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): @@ -38,6 +39,45 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) +def appendImageDataFooter(image,data): + d = 3 + data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) + dnp = np.frombuffer(data_compressed,np.uint8).copy() + w = image.size[0] + next_size = dnp.shape[0] + (w-(dnp.shape[0]%w)) + next_size = next_size + ((w*d)-(next_size%(w*d))) + dnp.resize(next_size) + dnp = dnp.reshape((-1,w,d)) + print(dnp.shape) + im = Image.fromarray(dnp,mode='RGB') + background = Image.new('RGB',(image.size[0],image.size[1]+im.size[1]+1),(0,0,0)) + background.paste(image,(0,0)) + background.paste(im,(0,image.size[1]+1)) + return background + +def crop_black(img,tol=0): + mask = (img>tol).all(2) + mask0,mask1 = mask.any(0),mask.any(1) + col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() + row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end,col_start:col_end] + +def extractImageDataFooter(image): + d=3 + outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) + lastRow = np.where( np.sum(outarr, axis=(1,2))==0) + if lastRow[0].shape[0] == 0: + print('Image data block not found.') + return None + lastRow = lastRow[0] + + lastRow = lastRow.max() + + dataBlock = outarr[lastRow+1::].astype(np.uint8).flatten().tobytes() + print(lastRow) + data = zlib.decompress(dataBlock) + return json.loads(data,cls=EmbeddingDecoder) + class Embedding: def __init__(self, vec, name, step=None): self.vec = vec @@ -113,6 +153,9 @@ class EmbeddingDatabase: if 'sd-ti-embedding' in embed_image.text: data = embeddingFromB64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) + else: + data = extractImageDataFooter(embed_image) + name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -190,7 +233,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -308,6 +351,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) + captioned_image = appendImageDataFooter(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From df6d0d9286279c41c4c67460c3158fa268697524 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 15:43:09 +0100 Subject: convert back to rgb as some hosts add alpha --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 6fb64691..667a7cf2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -64,7 +64,7 @@ def crop_black(img,tol=0): def extractImageDataFooter(image): d=3 - outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) lastRow = np.where( np.sum(outarr, axis=(1,2))==0) if lastRow[0].shape[0] == 0: print('Image data block not found.') -- cgit v1.2.1 From 1d64976dbc5a0f3124567b91fadd5014a9d93c5f Mon Sep 17 00:00:00 2001 From: Justin Maier Date: Mon, 10 Oct 2022 12:04:21 -0600 Subject: Simplify crop logic --- modules/extras.py | 14 +++----------- modules/ui.py | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/modules/extras.py b/modules/extras.py index 83ca7049..b24d7de3 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -73,16 +73,6 @@ def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, crop_info = " (crop)" if upscaling_crop else "" info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n" - def crop_upscaled_center(image, resize_w, resize_h): - left = int(math.ceil((image.width - resize_w) / 2)) - right = image.width - int(math.floor((image.width - resize_w) / 2)) - top = int(math.ceil((image.height - resize_h) / 2)) - bottom = image.height - int(math.floor((image.height - resize_h) / 2)) - - image = image.crop((left, top, right, bottom)) - return image - - if upscaling_resize != 1.0: def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop): small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10)) @@ -94,7 +84,9 @@ def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, upscaler = shared.sd_upscalers[scaler_index] c = upscaler.scaler.upscale(image, resize, upscaler.data_path) if mode == 1 and crop: - c = crop_upscaled_center(c, resize_w, resize_h) + cropped = Image.new("RGB", (resize_w, resize_h)) + cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2)) + c = cropped cached_images[key] = c return c diff --git a/modules/ui.py b/modules/ui.py index 4bb2892b..1aabe18d 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -909,8 +909,8 @@ def create_ui(wrap_gradio_gpu_call): with gr.TabItem('Scale to'): with gr.Group(): with gr.Row(): - upscaling_resize_w = gr.Number(label="Width", value=512) - upscaling_resize_h = gr.Number(label="Height", value=512) + upscaling_resize_w = gr.Number(label="Width", value=512, precision=0) + upscaling_resize_h = gr.Number(label="Height", value=512, precision=0) upscaling_crop = gr.Checkbox(label='Crop to fit', value=True) with gr.Group(): -- cgit v1.2.1 From a1a05ad2d13d0b995dbf8ecead6315f17837ef81 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:47:58 -0500 Subject: import time missing, added to deepbooru fixxing error on get_deepbooru_tags --- modules/deepbooru.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index cee4a3b4..12555b2e 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,6 +1,7 @@ import os.path from concurrent.futures import ProcessPoolExecutor import multiprocessing +import time def get_deepbooru_tags(pil_image, threshold=0.5): -- cgit v1.2.1 From b980e7188c671fc55b26557f097076fb5c976ba0 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:52:54 -0500 Subject: corrected tag return in get_deepbooru_tags --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 12555b2e..ebdba5e0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -15,7 +15,6 @@ def get_deepbooru_tags(pil_image, threshold=0.5): while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) release_process() - return ret def deepbooru_process(queue, deepbooru_process_return, threshold): -- cgit v1.2.1 From 315d5a8ed975c88f670bc484f40a23fbf3a77b63 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:14:44 +0100 Subject: update data dis[play style --- modules/textual_inversion/textual_inversion.py | 88 +++++++++++++++++++------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 667a7cf2..95eebea7 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -39,20 +39,59 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) -def appendImageDataFooter(image,data): +def xorBlock(block): + return np.bitwise_xor(block.astype(np.uint8), + ((np.random.RandomState(0xDEADBEEF).random(block.shape)*255).astype(np.uint8)) & 0x0F ) + +def styleBlock(block,sequence): + im = Image.new('RGB',(block.shape[1],block.shape[0])) + draw = ImageDraw.Draw(im) + i=0 + for x in range(-6,im.size[0],8): + for yi,y in enumerate(range(-6,im.size[1],8)): + offset=0 + if yi%2==0: + offset=4 + shade = sequence[i%len(sequence)] + i+=1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + + fg = np.array(im).astype(np.uint8) & 0xF0 + return block ^ fg + +def insertImageDataEmbed(image,data): d = 3 data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) dnp = np.frombuffer(data_compressed,np.uint8).copy() - w = image.size[0] - next_size = dnp.shape[0] + (w-(dnp.shape[0]%w)) - next_size = next_size + ((w*d)-(next_size%(w*d))) - dnp.resize(next_size) - dnp = dnp.reshape((-1,w,d)) - print(dnp.shape) - im = Image.fromarray(dnp,mode='RGB') - background = Image.new('RGB',(image.size[0],image.size[1]+im.size[1]+1),(0,0,0)) - background.paste(image,(0,0)) - background.paste(im,(0,image.size[1]+1)) + dnphigh = dnp >> 4 + dnplow = dnp & 0x0F + + h = image.size[1] + next_size = dnplow.shape[0] + (h-(dnplow.shape[0]%h)) + next_size = next_size + ((h*d)-(next_size%(h*d))) + + dnplow.resize(next_size) + dnplow = dnplow.reshape((h,-1,d)) + + dnphigh.resize(next_size) + dnphigh = dnphigh.reshape((h,-1,d)) + + edgeStyleWeights = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] + edgeStyleWeights = (np.abs(edgeStyleWeights)/np.max(np.abs(edgeStyleWeights))*255).astype(np.uint8) + + dnplow = styleBlock(dnplow,sequence=edgeStyleWeights) + dnplow = xorBlock(dnplow) + dnphigh = styleBlock(dnphigh,sequence=edgeStyleWeights[::-1]) + dnphigh = xorBlock(dnphigh) + + imlow = Image.fromarray(dnplow,mode='RGB') + imhigh = Image.fromarray(dnphigh,mode='RGB') + + background = Image.new('RGB',(image.size[0]+imlow.size[0]+imhigh.size[0]+2,image.size[1]),(0,0,0)) + background.paste(imlow,(0,0)) + background.paste(image,(imlow.size[0]+1,0)) + background.paste(imhigh,(imlow.size[0]+1+image.size[0]+1,0)) + return background def crop_black(img,tol=0): @@ -62,19 +101,22 @@ def crop_black(img,tol=0): row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() return img[row_start:row_end,col_start:col_end] -def extractImageDataFooter(image): +def extractImageDataEmbed(image): d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) - lastRow = np.where( np.sum(outarr, axis=(1,2))==0) - if lastRow[0].shape[0] == 0: - print('Image data block not found.') + outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + blackCols = np.where( np.sum(outarr, axis=(0,2))==0) + if blackCols[0].shape[0] < 2: + print('No Image data blocks found.') return None - lastRow = lastRow[0] - - lastRow = lastRow.max() - dataBlock = outarr[lastRow+1::].astype(np.uint8).flatten().tobytes() - print(lastRow) + dataBlocklower = outarr[:,:blackCols[0].min(),:].astype(np.uint8) + dataBlockupper = outarr[:,blackCols[0].max()+1:,:].astype(np.uint8) + + dataBlocklower = xorBlock(dataBlocklower) + dataBlockupper = xorBlock(dataBlockupper) + + dataBlock = (dataBlockupper << 4) | (dataBlocklower) + dataBlock = dataBlock.flatten().tobytes() data = zlib.decompress(dataBlock) return json.loads(data,cls=EmbeddingDecoder) @@ -154,7 +196,7 @@ class EmbeddingDatabase: data = embeddingFromB64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) else: - data = extractImageDataFooter(embed_image) + data = extractImageDataEmbed(embed_image) name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -351,7 +393,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = appendImageDataFooter(captioned_image,data) + captioned_image = insertImageDataEmbed(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From 767202a4c324f9b49f63ab4dabbb5736fe9df6e5 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:20:52 +0100 Subject: add dependency --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 95eebea7..f3cacaa0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,7 +7,7 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin +from PIL import Image,PngImagePlugin,ImageDraw from ..images import captionImageOverlay import numpy as np import base64 -- cgit v1.2.1 From e0fbe6d27e7b4505766c8cb5a4264e1114cf3721 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:26:24 +0100 Subject: colour depth conversion fix --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index f3cacaa0..ae807268 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -103,7 +103,7 @@ def crop_black(img,tol=0): def extractImageDataEmbed(image): d=3 - outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F blackCols = np.where( np.sum(outarr, axis=(0,2))==0) if blackCols[0].shape[0] < 2: print('No Image data blocks found.') -- cgit v1.2.1 From 76ef3d75f61253516c024553335d9083d9660a8a Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:01:49 -0500 Subject: added deepbooru settings (threshold and sort by alpha or likelyhood) --- modules/deepbooru.py | 36 +++++++++++++++++++++++++----------- modules/shared.py | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index ebdba5e0..e31e92c0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -3,31 +3,32 @@ from concurrent.futures import ProcessPoolExecutor import multiprocessing import time - -def get_deepbooru_tags(pil_image, threshold=0.5): +def get_deepbooru_tags(pil_image): """ This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(threshold) + create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) + tags = shared.deepbooru_process_return["value"] release_process() + return tags -def deepbooru_process(queue, deepbooru_process_return, threshold): +def deepbooru_process(queue, deepbooru_process_return, threshold, alpha_sort): model, tags = get_deepbooru_tags_model() while True: # while process is running, keep monitoring queue for new image pil_image = queue.get() if pil_image == "QUIT": break else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) -def create_deepbooru_process(threshold=0.5): +def create_deepbooru_process(threshold, alpha_sort): """ Creates deepbooru process. A queue is created to send images into the process. This enables multiple images to be processed in a row without reloading the model or creating a new process. To return the data, a shared @@ -40,7 +41,7 @@ def create_deepbooru_process(threshold=0.5): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, alpha_sort)) shared.deepbooru_process.start() @@ -80,7 +81,7 @@ def get_deepbooru_tags_model(): return model, tags -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort): import deepdanbooru as dd import tensorflow as tf import numpy as np @@ -105,15 +106,28 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): for i, tag in enumerate(tags): result_dict[tag] = y[i] - result_tags_out = [] + + unsorted_tags_in_theshold = [] result_tags_print = [] for tag in tags: if result_dict[tag] >= threshold: if tag.startswith("rating:"): continue - result_tags_out.append(tag) + unsorted_tags_in_theshold.append((result_dict[tag], tag)) result_tags_print.append(f'{result_dict[tag]} {tag}') + # sort tags + result_tags_out = [] + sort_ndx = 0 + print(alpha_sort) + if alpha_sort: + sort_ndx = 1 + + # sort by reverse by likelihood and normal for alpha + unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) + for weight, tag in unsorted_tags_in_theshold: + result_tags_out.append(tag) + print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') diff --git a/modules/shared.py b/modules/shared.py index 1995a99a..2e307809 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -261,6 +261,12 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), })) +if cmd_opts.deepdanbooru: + options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { + "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), + 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + })) + class Options: data = None -- cgit v1.2.1 From bb932dbf9faf43ba918daa4791873078797b2a48 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:37:52 -0500 Subject: added alpha sort and threshold variables to create process method in preprocessing --- modules/textual_inversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 4a2194da..c0af729b 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process() + deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: -- cgit v1.2.1 From 7aa8fcac1e45c3ad9c6a40df0e44a346afcd5032 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 04:17:36 +0100 Subject: use simple lcg in xor --- modules/textual_inversion/textual_inversion.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ae807268..13416a08 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -39,9 +39,15 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) +def lcg(m=2**32, a=1664525, c=1013904223, seed=0): + while True: + seed = (a * seed + c) % m + yield seed + def xorBlock(block): - return np.bitwise_xor(block.astype(np.uint8), - ((np.random.RandomState(0xDEADBEEF).random(block.shape)*255).astype(np.uint8)) & 0x0F ) + g = lcg() + randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) + return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) def styleBlock(block,sequence): im = Image.new('RGB',(block.shape[1],block.shape[0])) -- cgit v1.2.1 From ff4ef13dd591ec52f196f344f47537695df95364 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 10:24:27 -0500 Subject: removed unneeded print --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index e31e92c0..89dcac3c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -119,7 +119,6 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) # sort tags result_tags_out = [] sort_ndx = 0 - print(alpha_sort) if alpha_sort: sort_ndx = 1 -- cgit v1.2.1 From c080f52ceae73b893155eff7de577aaf1a982a2f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:37:58 +0100 Subject: move embedding logic to separate file --- modules/textual_inversion/image_embedding.py | 234 +++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 modules/textual_inversion/image_embedding.py diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py new file mode 100644 index 00000000..6ad39602 --- /dev/null +++ b/modules/textual_inversion/image_embedding.py @@ -0,0 +1,234 @@ +import base64 +import json +import numpy as np +import zlib +from PIL import Image,PngImagePlugin,ImageDraw,ImageFont +from fonts.ttf import Roboto +import torch + +class EmbeddingEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, torch.Tensor): + return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} + return json.JSONEncoder.default(self, obj) + +class EmbeddingDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): + if 'TORCHTENSOR' in d: + return torch.from_numpy(np.array(d['TORCHTENSOR'])) + return d + +def embedding_to_b64(data): + d = json.dumps(data,cls=EmbeddingEncoder) + return base64.b64encode(d.encode()) + +def embedding_from_b64(data): + d = base64.b64decode(data) + return json.loads(d,cls=EmbeddingDecoder) + +def lcg(m=2**32, a=1664525, c=1013904223, seed=0): + while True: + seed = (a * seed + c) % m + yield seed%255 + +def xor_block(block): + g = lcg() + randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) + return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) + +def style_block(block,sequence): + im = Image.new('RGB',(block.shape[1],block.shape[0])) + draw = ImageDraw.Draw(im) + i=0 + for x in range(-6,im.size[0],8): + for yi,y in enumerate(range(-6,im.size[1],8)): + offset=0 + if yi%2==0: + offset=4 + shade = sequence[i%len(sequence)] + i+=1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + + fg = np.array(im).astype(np.uint8) & 0xF0 + + return block ^ fg + +def insert_image_data_embed(image,data): + d = 3 + data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) + data_np_ = np.frombuffer(data_compressed,np.uint8).copy() + data_np_high = data_np_ >> 4 + data_np_low = data_np_ & 0x0F + + h = image.size[1] + next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0]%h)) + next_size = next_size + ((h*d)-(next_size%(h*d))) + + data_np_low.resize(next_size) + data_np_low = data_np_low.reshape((h,-1,d)) + + data_np_high.resize(next_size) + data_np_high = data_np_high.reshape((h,-1,d)) + + edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] + edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8) + + data_np_low = style_block(data_np_low,sequence=edge_style) + data_np_low = xor_block(data_np_low) + data_np_high = style_block(data_np_high,sequence=edge_style[::-1]) + data_np_high = xor_block(data_np_high) + + im_low = Image.fromarray(data_np_low,mode='RGB') + im_high = Image.fromarray(data_np_high,mode='RGB') + + background = Image.new('RGB',(image.size[0]+im_low.size[0]+im_high.size[0]+2,image.size[1]),(0,0,0)) + background.paste(im_low,(0,0)) + background.paste(image,(im_low.size[0]+1,0)) + background.paste(im_high,(im_low.size[0]+1+image.size[0]+1,0)) + + return background + +def crop_black(img,tol=0): + mask = (img>tol).all(2) + mask0,mask1 = mask.any(0),mask.any(1) + col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() + row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end,col_start:col_end] + +def extract_image_data_embed(image): + d=3 + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + black_cols = np.where( np.sum(outarr, axis=(0,2))==0) + if black_cols[0].shape[0] < 2: + print('No Image data blocks found.') + return None + + data_block_lower = outarr[:,:black_cols[0].min(),:].astype(np.uint8) + data_block_upper = outarr[:,black_cols[0].max()+1:,:].astype(np.uint8) + + data_block_lower = xor_block(data_block_lower) + data_block_upper = xor_block(data_block_upper) + + data_block = (data_block_upper << 4) | (data_block_lower) + data_block = data_block.flatten().tobytes() + + data = zlib.decompress(data_block) + return json.loads(data,cls=EmbeddingDecoder) + +def addCaptionLines(lines,image,initialx,textfont): + draw = ImageDraw.Draw(image) + hstart =initialx + for fill,line in lines: + fontsize = 32 + font = ImageFont.truetype(textfont, fontsize) + _,_,w, h = draw.textbbox((0,0),line,font=font) + fontsize = min( int(fontsize * ((image.size[0]-35)/w) ), 28) + font = ImageFont.truetype(textfont, fontsize) + _,_,w,h = draw.textbbox((0,0),line,font=font) + draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) + hstart += h + return hstart + +def caption_image(image,prelines,postlines,background=(51, 51, 51),font=None): + if font is None: + try: + font = ImageFont.truetype(opts.font or Roboto, fontsize) + font = opts.font or Roboto + except Exception: + font = Roboto + + sample_image = image + background = Image.new("RGBA", (sample_image.size[0],sample_image.size[1]+1024), background) + hoffset = addCaptionLines(prelines,background,5,font)+16 + background.paste(sample_image,(0,hoffset)) + hoffset = hoffset+sample_image.size[1]+8 + hoffset = addCaptionLines(postlines,background,hoffset,font) + background = background.crop((0,0,sample_image.size[0],hoffset+8)) + return background + +def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): + from math import cos + + image = srcimage.copy() + + if textfont is None: + try: + textfont = ImageFont.truetype(opts.font or Roboto, fontsize) + textfont = opts.font or Roboto + except Exception: + textfont = Roboto + + factor = 1.5 + gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) + for y in range(image.size[1]): + mag = 1-cos(y/image.size[1]*factor) + mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0,0,0,int(mag*255))) + image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) + + draw = ImageDraw.Draw(image) + fontsize = 32 + font = ImageFont.truetype(textfont, fontsize) + padding = 10 + + _,_,w, h = draw.textbbox((0,0),title,font=font) + fontsize = min( int(fontsize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) + font = ImageFont.truetype(textfont, fontsize) + _,_,w,h = draw.textbbox((0,0),title,font=font) + draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) + + _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) + fontsize_left = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerMid,font=font) + fontsize_mid = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerRight,font=font) + fontsize_right = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + + font = ImageFont.truetype(textfont, min(fontsize_left,fontsize_mid,fontsize_right)) + + draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) + + return image + +if __name__ == '__main__': + + image = Image.new('RGBA',(512,512),(255,255,200,255)) + caption_image(image,[((255,255,255),'line a'),((255,255,255),'line b')], + [((255,255,255),'line c'),((255,255,255),'line d')]) + + image = Image.new('RGBA',(512,512),(255,255,200,255)) + cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') + + test_embed = {'string_to_param':{'*':torch.from_numpy(np.random.random((2, 4096)))}} + + embedded_image = insert_image_data_embed(cap_image, test_embed) + + retrived_embed = extract_image_data_embed(embedded_image) + + assert str(retrived_embed) == str(test_embed) + + embedded_image2 = insert_image_data_embed(cap_image, retrived_embed) + + assert embedded_image == embedded_image2 + + g = lcg() + shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist() + + reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, + 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, + 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, + 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, + 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, + 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, + 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, + 204, 86, 73, 222, 44, 198, 118, 240, 97] + + assert shared_random == reference_random + + hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) + + assert 12731374 == hunna_kay_random_sum \ No newline at end of file -- cgit v1.2.1 From e5fbf5c755b7c306696546405385d5d2314e555b Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:46:33 +0100 Subject: remove embedding related image functions from images --- modules/images.py | 77 ------------------------------------------------------- 1 file changed, 77 deletions(-) diff --git a/modules/images.py b/modules/images.py index e62eec8e..c0a90676 100644 --- a/modules/images.py +++ b/modules/images.py @@ -463,80 +463,3 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i txt_fullfn = None return fullfn, txt_fullfn - -def addCaptionLines(lines,image,initialx,textfont): - draw = ImageDraw.Draw(image) - hstart =initialx - for fill,line in lines: - fontSize = 32 - font = ImageFont.truetype(textfont, fontSize) - _,_,w, h = draw.textbbox((0,0),line,font=font) - fontSize = min( int(fontSize * ((image.size[0]-35)/w) ), 28) - font = ImageFont.truetype(textfont, fontSize) - _,_,w,h = draw.textbbox((0,0),line,font=font) - draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) - hstart += h - return hstart - -def captionImge(image,prelines,postlines,background=(51, 51, 51),font=None): - if font is None: - try: - font = ImageFont.truetype(opts.font or Roboto, fontsize) - font = opts.font or Roboto - except Exception: - font = Roboto - - sampleImage = image - background = Image.new("RGBA", (sampleImage.size[0],sampleImage.size[1]+1024), background) - hoffset = addCaptionLines(prelines,background,5,font)+16 - background.paste(sampleImage,(0,hoffset)) - hoffset = hoffset+sampleImage.size[1]+8 - hoffset = addCaptionLines(postlines,background,hoffset,font) - background = background.crop((0,0,sampleImage.size[0],hoffset+8)) - return background - -def captionImageOverlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): - from math import cos - - image = srcimage.copy() - - if textfont is None: - try: - textfont = ImageFont.truetype(opts.font or Roboto, fontsize) - textfont = opts.font or Roboto - except Exception: - textfont = Roboto - - factor = 1.5 - gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) - for y in range(image.size[1]): - mag = 1-cos(y/image.size[1]*factor) - mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) - gradient.putpixel((0, y), (0,0,0,int(mag*255))) - image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) - - draw = ImageDraw.Draw(image) - fontSize = 32 - font = ImageFont.truetype(textfont, fontSize) - padding = 10 - - _,_,w, h = draw.textbbox((0,0),title,font=font) - fontSize = min( int(fontSize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) - font = ImageFont.truetype(textfont, fontSize) - _,_,w,h = draw.textbbox((0,0),title,font=font) - draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) - - _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) - fontSizeleft = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerMid,font=font) - fontSizemid = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerRight,font=font) - fontSizeright = min( int(fontSize * (((image.size[0]/3)-(padding))/w) ), 72) - - font = ImageFont.truetype(textfont, min(fontSizeleft,fontSizemid,fontSizeright)) - - draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) - - return image -- cgit v1.2.1 From 61788c0538415fa9ca1dd1b306519c116b18bd2c Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:50:50 +0100 Subject: shift embedding logic out of textual_inversion --- modules/textual_inversion/textual_inversion.py | 125 ++----------------------- 1 file changed, 6 insertions(+), 119 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 8c66aeb5..22b4ae7f 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,124 +7,11 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin,ImageDraw -from ..images import captionImageOverlay -import numpy as np -import base64 -import json -import zlib +from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -class EmbeddingEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, torch.Tensor): - return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} - return json.JSONEncoder.default(self, obj) - -class EmbeddingDecoder(json.JSONDecoder): - def __init__(self, *args, **kwargs): - json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) - def object_hook(self, d): - if 'TORCHTENSOR' in d: - return torch.from_numpy(np.array(d['TORCHTENSOR'])) - return d - -def embeddingToB64(data): - d = json.dumps(data,cls=EmbeddingEncoder) - return base64.b64encode(d.encode()) - -def embeddingFromB64(data): - d = base64.b64decode(data) - return json.loads(d,cls=EmbeddingDecoder) - -def lcg(m=2**32, a=1664525, c=1013904223, seed=0): - while True: - seed = (a * seed + c) % m - yield seed - -def xorBlock(block): - g = lcg() - randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) - return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) - -def styleBlock(block,sequence): - im = Image.new('RGB',(block.shape[1],block.shape[0])) - draw = ImageDraw.Draw(im) - i=0 - for x in range(-6,im.size[0],8): - for yi,y in enumerate(range(-6,im.size[1],8)): - offset=0 - if yi%2==0: - offset=4 - shade = sequence[i%len(sequence)] - i+=1 - draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) - - fg = np.array(im).astype(np.uint8) & 0xF0 - return block ^ fg - -def insertImageDataEmbed(image,data): - d = 3 - data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) - dnp = np.frombuffer(data_compressed,np.uint8).copy() - dnphigh = dnp >> 4 - dnplow = dnp & 0x0F - - h = image.size[1] - next_size = dnplow.shape[0] + (h-(dnplow.shape[0]%h)) - next_size = next_size + ((h*d)-(next_size%(h*d))) - - dnplow.resize(next_size) - dnplow = dnplow.reshape((h,-1,d)) - - dnphigh.resize(next_size) - dnphigh = dnphigh.reshape((h,-1,d)) - - edgeStyleWeights = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] - edgeStyleWeights = (np.abs(edgeStyleWeights)/np.max(np.abs(edgeStyleWeights))*255).astype(np.uint8) - - dnplow = styleBlock(dnplow,sequence=edgeStyleWeights) - dnplow = xorBlock(dnplow) - dnphigh = styleBlock(dnphigh,sequence=edgeStyleWeights[::-1]) - dnphigh = xorBlock(dnphigh) - - imlow = Image.fromarray(dnplow,mode='RGB') - imhigh = Image.fromarray(dnphigh,mode='RGB') - - background = Image.new('RGB',(image.size[0]+imlow.size[0]+imhigh.size[0]+2,image.size[1]),(0,0,0)) - background.paste(imlow,(0,0)) - background.paste(image,(imlow.size[0]+1,0)) - background.paste(imhigh,(imlow.size[0]+1+image.size[0]+1,0)) - - return background - -def crop_black(img,tol=0): - mask = (img>tol).all(2) - mask0,mask1 = mask.any(0),mask.any(1) - col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() - row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() - return img[row_start:row_end,col_start:col_end] - -def extractImageDataEmbed(image): - d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F - blackCols = np.where( np.sum(outarr, axis=(0,2))==0) - if blackCols[0].shape[0] < 2: - print('No Image data blocks found.') - return None - - dataBlocklower = outarr[:,:blackCols[0].min(),:].astype(np.uint8) - dataBlockupper = outarr[:,blackCols[0].max()+1:,:].astype(np.uint8) - - dataBlocklower = xorBlock(dataBlocklower) - dataBlockupper = xorBlock(dataBlockupper) - - dataBlock = (dataBlockupper << 4) | (dataBlocklower) - dataBlock = dataBlock.flatten().tobytes() - data = zlib.decompress(dataBlock) - return json.loads(data,cls=EmbeddingDecoder) class Embedding: def __init__(self, vec, name, step=None): @@ -199,10 +86,10 @@ class EmbeddingDatabase: if filename.upper().endswith('.PNG'): embed_image = Image.open(path) if 'sd-ti-embedding' in embed_image.text: - data = embeddingFromB64(embed_image.text['sd-ti-embedding']) + data = embedding_from_b64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) else: - data = extractImageDataEmbed(embed_image) + data = extract_image_data_embed(embed_image) name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -393,7 +280,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) - info.add_text("sd-ti-embedding", embeddingToB64(data)) + info.add_text("sd-ti-embedding", embedding_to_b64(data)) title = "<{}>".format(data.get('name','???')) checkpoint = sd_models.select_checkpoint() @@ -401,8 +288,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_mid = '[{}]'.format(checkpoint.hash) footer_right = '{}'.format(embedding.step) - captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = insertImageDataEmbed(captioned_image,data) + captioned_image = caption_image_overlay(image,title,footer_left,footer_mid,footer_right) + captioned_image = insert_image_data_embed(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From db71290d2659d3b58ff9b57a82e4721a9eab9229 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:55:54 +0100 Subject: remove old caption method --- modules/textual_inversion/image_embedding.py | 39 ++-------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index 6ad39602..c67028a5 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -117,37 +117,6 @@ def extract_image_data_embed(image): data = zlib.decompress(data_block) return json.loads(data,cls=EmbeddingDecoder) -def addCaptionLines(lines,image,initialx,textfont): - draw = ImageDraw.Draw(image) - hstart =initialx - for fill,line in lines: - fontsize = 32 - font = ImageFont.truetype(textfont, fontsize) - _,_,w, h = draw.textbbox((0,0),line,font=font) - fontsize = min( int(fontsize * ((image.size[0]-35)/w) ), 28) - font = ImageFont.truetype(textfont, fontsize) - _,_,w,h = draw.textbbox((0,0),line,font=font) - draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) - hstart += h - return hstart - -def caption_image(image,prelines,postlines,background=(51, 51, 51),font=None): - if font is None: - try: - font = ImageFont.truetype(opts.font or Roboto, fontsize) - font = opts.font or Roboto - except Exception: - font = Roboto - - sample_image = image - background = Image.new("RGBA", (sample_image.size[0],sample_image.size[1]+1024), background) - hoffset = addCaptionLines(prelines,background,5,font)+16 - background.paste(sample_image,(0,hoffset)) - hoffset = hoffset+sample_image.size[1]+8 - hoffset = addCaptionLines(postlines,background,hoffset,font) - background = background.crop((0,0,sample_image.size[0],hoffset+8)) - return background - def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): from math import cos @@ -195,11 +164,7 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo return image if __name__ == '__main__': - - image = Image.new('RGBA',(512,512),(255,255,200,255)) - caption_image(image,[((255,255,255),'line a'),((255,255,255),'line b')], - [((255,255,255),'line c'),((255,255,255),'line d')]) - + image = Image.new('RGBA',(512,512),(255,255,200,255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') @@ -231,4 +196,4 @@ if __name__ == '__main__': hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) - assert 12731374 == hunna_kay_random_sum \ No newline at end of file + assert 12731374 == hunna_kay_random_sum -- cgit v1.2.1 From aa75d5cfe8c84768b0f5d16f977ddba298677379 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:06:13 +0100 Subject: correct conflict resolution typo --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 22b4ae7f..789383ce 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -169,7 +169,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt) +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." -- cgit v1.2.1 From 91d7ee0d097a7ea203d261b570cd2b834837d9e2 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:09:10 +0100 Subject: update imports --- modules/textual_inversion/textual_inversion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 789383ce..ff0a62b3 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,6 +12,9 @@ from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +from modules.textual_inversion.image_embedding import( embedding_to_b64,embedding_from_b64, + insert_image_data_embed,extract_image_data_embed, + caption_image_overlay ) class Embedding: def __init__(self, vec, name, step=None): -- cgit v1.2.1 From 5f3317376bb7952bc5145f05f16c1bbd466efc85 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:09:49 +0100 Subject: spacing --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ff0a62b3..485ef46c 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,7 +12,7 @@ from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.image_embedding import( embedding_to_b64,embedding_from_b64, +from modules.textual_inversion.image_embedding import (embedding_to_b64,embedding_from_b64, insert_image_data_embed,extract_image_data_embed, caption_image_overlay ) -- cgit v1.2.1 From 7e6a6e00ad6f3b7ef43c8120db9ecac6e8d6bea5 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:20:46 +0100 Subject: Add files via upload --- modules/textual_inversion/test_embedding.png | Bin 0 -> 489220 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 modules/textual_inversion/test_embedding.png diff --git a/modules/textual_inversion/test_embedding.png b/modules/textual_inversion/test_embedding.png new file mode 100644 index 00000000..07e2d9af Binary files /dev/null and b/modules/textual_inversion/test_embedding.png differ -- cgit v1.2.1 From 66ec505975aaa305a217fc27281ce368cbaef281 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:21:30 +0100 Subject: add file based test --- modules/textual_inversion/image_embedding.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index c67028a5..1224fb42 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -164,6 +164,14 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo return image if __name__ == '__main__': + + testEmbed = Image.open('test_embedding.png') + + data = extract_image_data_embed(testEmbed) + assert data is not None + + data = embedding_from_b64(testEmbed.text['sd-ti-embedding']) + assert data is not None image = Image.new('RGBA',(512,512),(255,255,200,255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') -- cgit v1.2.1 From f53f703aebc801c4204182d52bb1e0bef9808e1f Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 18:12:12 -0500 Subject: resolved conflicts, moved settings under interrogate section, settings only show if deepbooru flag is enabled --- modules/deepbooru.py | 2 +- modules/shared.py | 19 +++++++++---------- modules/textual_inversion/preprocess.py | 2 +- modules/ui.py | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 89dcac3c..29529949 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -8,7 +8,7 @@ def get_deepbooru_tags(pil_image): This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: diff --git a/modules/shared.py b/modules/shared.py index 817203f8..5456c477 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -248,15 +248,20 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), })) -options_templates.update(options_section(('interrogate', "Interrogate Options"), { +interrogate_option_dictionary = { "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), - "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)"), - "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), -})) + "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)") +} + +if cmd_opts.deepdanbooru: + interrogate_option_dictionary["interrogate_deepbooru_score_threshold"] = OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}) + interrogate_option_dictionary["deepbooru_sort_alpha"] = OptionInfo(True, "Interrogate: deepbooru sort alphabetically", gr.Checkbox) + +options_templates.update(options_section(('interrogate', "Interrogate Options"), interrogate_option_dictionary)) options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), @@ -282,12 +287,6 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 'eta_noise_seed_delta': OptionInfo(0, "Eta noise seed delta", gr.Number, {"precision": 0}), })) -if cmd_opts.deepdanbooru: - options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { - "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), - 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), - })) - class Options: data = None diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a96388d6..113cecf1 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: diff --git a/modules/ui.py b/modules/ui.py index 2891fc8c..fa45edca 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -317,7 +317,7 @@ def interrogate(image): def interrogate_deepbooru(image): - prompt = get_deepbooru_tags(image, opts.interrogate_deepbooru_score_threshold) + prompt = get_deepbooru_tags(image) return gr_show(True) if prompt is None else prompt -- cgit v1.2.1 From 6d408c06c634cc96480f055941754dcc43f781d9 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 00:19:28 +0100 Subject: Prevent nans from failed float parsing from overwriting weights --- javascript/edit-attention.js | 1 + 1 file changed, 1 insertion(+) diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js index 79566a2e..3f1d2fbb 100644 --- a/javascript/edit-attention.js +++ b/javascript/edit-attention.js @@ -25,6 +25,7 @@ addEventListener('keydown', (event) => { } else { end = target.value.slice(selectionEnd + 1).indexOf(")") + 1; weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end)); + if (isNaN(weight)) return; if (event.key == minus) weight -= 0.1; if (event.key == plus) weight += 0.1; -- cgit v1.2.1 From 65b973ac4e547a325f30a05f852b161421af2041 Mon Sep 17 00:00:00 2001 From: supersteve3d <39339941+supersteve3d@users.noreply.github.com> Date: Wed, 12 Oct 2022 08:21:52 +0800 Subject: Update shared.py Correct typo to "Unload VAE and CLIP from VRAM when training" in settings tab. --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index c1092ff7..46bc740c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -229,7 +229,7 @@ options_templates.update(options_section(('system', "System"), { })) options_templates.update(options_section(('training', "Training"), { - "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP form VRAM when training"), + "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP from VRAM when training"), })) options_templates.update(options_section(('sd', "Stable Diffusion"), { -- cgit v1.2.1 From 6ac2ec2b78bc5fabd09cb866dd9a71061d669269 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 07:01:20 +0300 Subject: create dir for hypernetworks --- modules/shared.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/shared.py b/modules/shared.py index c1092ff7..e65e77f8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -86,6 +86,7 @@ parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram xformers_available = False config_filename = cmd_opts.ui_settings_file +os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True) hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) loaded_hypernetwork = None -- cgit v1.2.1 From fec2221eeaafb50afd26ba3e109bf6f928011e69 Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Tue, 11 Oct 2022 19:29:38 -0700 Subject: Truncate error text to fix service lockup / stall What: * Update wrap_gradio_call to add a limit to the maximum amount of text output Why: * wrap_gradio_call currently prints out a list of the arguments provided to the failing function. * if that function is save_image, this causes the entire image to be printed to stderr * If the image is large, this can cause the service to lock up while attempting to print all the text * It is easy to generate large images using the x/y plot script * it is easy to encounter image save exceptions, including if the output directory does not exist / cannot be written to, or if the file is too big * The huge amount of log spam is confusing and not particularly helpful --- modules/ui.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/ui.py b/modules/ui.py index 1204eef7..33a49d3b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -181,8 +181,15 @@ def wrap_gradio_call(func, extra_outputs=None): try: res = list(func(*args, **kwargs)) except Exception as e: + # When printing out our debug argument list, do not print out more than a MB of text + max_debug_str_len = 131072 # (1024*1024)/8 + print("Error completing request", file=sys.stderr) - print("Arguments:", args, kwargs, file=sys.stderr) + argStr = f"Arguments: {str(args)} {str(kwargs)}" + print(argStr[:max_debug_str_len], file=sys.stderr) + if len(argStr) > max_debug_str_len: + print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) shared.state.job = "" -- cgit v1.2.1 From 336bd8703c7b4d71f2f096f303599925a30b8167 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:00:07 +0300 Subject: just add the deepdanbooru settings unconditionally --- modules/shared.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index f150e024..42e99741 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -249,20 +249,15 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), })) -interrogate_option_dictionary = { +options_templates.update(options_section(('interrogate', "Interrogate Options"), { "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), - "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)") -} - -if cmd_opts.deepdanbooru: - interrogate_option_dictionary["interrogate_deepbooru_score_threshold"] = OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}) - interrogate_option_dictionary["deepbooru_sort_alpha"] = OptionInfo(True, "Interrogate: deepbooru sort alphabetically", gr.Checkbox) - -options_templates.update(options_section(('interrogate', "Interrogate Options"), interrogate_option_dictionary)) + "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), + "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), +})) options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), -- cgit v1.2.1 From fd07b103aeb70a80e3641068e483475e32c9750c Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:00:39 +0300 Subject: prevent SD model from loading when running in deepdanbooru process --- webui.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/webui.py b/webui.py index ca278e94..32bcdb06 100644 --- a/webui.py +++ b/webui.py @@ -31,12 +31,7 @@ from modules.paths import script_path from modules.shared import cmd_opts import modules.hypernetworks.hypernetwork -modelloader.cleanup_models() -modules.sd_models.setup_model() -codeformer.setup_model(cmd_opts.codeformer_models_path) -gfpgan.setup_model(cmd_opts.gfpgan_models_path) -shared.face_restorers.append(modules.face_restoration.FaceRestoration()) -modelloader.load_upscalers() + queue_lock = threading.Lock() @@ -78,12 +73,19 @@ def wrap_gradio_gpu_call(func, extra_outputs=None): return modules.ui.wrap_gradio_call(f, extra_outputs=extra_outputs) -modules.scripts.load_scripts(os.path.join(script_path, "scripts")) +def initialize(): + modelloader.cleanup_models() + modules.sd_models.setup_model() + codeformer.setup_model(cmd_opts.codeformer_models_path) + gfpgan.setup_model(cmd_opts.gfpgan_models_path) + shared.face_restorers.append(modules.face_restoration.FaceRestoration()) + modelloader.load_upscalers() -shared.sd_model = modules.sd_models.load_model() -shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + modules.scripts.load_scripts(os.path.join(script_path, "scripts")) -shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) + shared.sd_model = modules.sd_models.load_model() + shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) def webui(): @@ -98,7 +100,7 @@ def webui(): demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) - app,local_url,share_url = demo.launch( + app, local_url, share_url = demo.launch( share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, server_port=cmd_opts.port, @@ -129,6 +131,6 @@ def webui(): print('Restarting Gradio') - if __name__ == "__main__": + initialize() webui() -- cgit v1.2.1 From 7edd58d90dd08f68fab5ff84d26dedd0eb85cae3 Mon Sep 17 00:00:00 2001 From: James Noeckel Date: Tue, 11 Oct 2022 17:48:24 -0700 Subject: update environment-wsl2.yaml --- environment-wsl2.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment-wsl2.yaml b/environment-wsl2.yaml index c9ce11df..f8872750 100644 --- a/environment-wsl2.yaml +++ b/environment-wsl2.yaml @@ -3,9 +3,9 @@ channels: - pytorch - defaults dependencies: - - python=3.8.5 - - pip=20.3 + - python=3.10 + - pip=22.2.2 - cudatoolkit=11.3 - - pytorch=1.11.0 - - torchvision=0.12.0 - - numpy=1.19.2 + - pytorch=1.12.1 + - torchvision=0.13.1 + - numpy=1.23.1 \ No newline at end of file -- cgit v1.2.1 From 8aead63f1ac9fec0e5198bd626ec2c5bcbeff4d8 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:32:14 +0300 Subject: emergency fix --- webui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webui.py b/webui.py index 32bcdb06..33ba7905 100644 --- a/webui.py +++ b/webui.py @@ -89,6 +89,8 @@ def initialize(): def webui(): + initialize() + # make the program just exit at ctrl+c without waiting for anything def sigint_handler(sig, frame): print(f'Interrupted with signal {sig} in {frame}') @@ -132,5 +134,4 @@ def webui(): if __name__ == "__main__": - initialize() webui() -- cgit v1.2.1 From 57e03cdd244eee4e33ccab7554b3594563a3d0cd Mon Sep 17 00:00:00 2001 From: brkirch Date: Wed, 12 Oct 2022 00:54:24 -0400 Subject: Ensure the directory exists before saving to it The directory for the images saved with the Save button may still not exist, so it needs to be created prior to opening the log.csv file. --- modules/ui.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ui.py b/modules/ui.py index 00bf09ae..cd67b84b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -131,6 +131,8 @@ def save_files(js_data, images, do_make_zip, index): images = [images[index]] start_index = index + os.makedirs(opts.outdir_save, exist_ok=True) + with open(os.path.join(opts.outdir_save, "log.csv"), "a", encoding="utf8", newline='') as file: at_start = file.tell() == 0 writer = csv.writer(file) -- cgit v1.2.1 From f421f2af2df41a86af1aea1e82b4c32a2d143385 Mon Sep 17 00:00:00 2001 From: aoirusann Date: Wed, 12 Oct 2022 13:02:28 +0800 Subject: [img2imgalt] Fix seed & Allow batch. --- scripts/img2imgalt.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/img2imgalt.py b/scripts/img2imgalt.py index f9894cb0..313a55d2 100644 --- a/scripts/img2imgalt.py +++ b/scripts/img2imgalt.py @@ -129,8 +129,6 @@ class Script(scripts.Script): return [original_prompt, original_negative_prompt, cfg, st, randomness, sigma_adjustment] def run(self, p, original_prompt, original_negative_prompt, cfg, st, randomness, sigma_adjustment): - p.batch_size = 1 - p.batch_count = 1 def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): @@ -154,7 +152,7 @@ class Script(scripts.Script): rec_noise = find_noise_for_image(p, cond, uncond, cfg, st) self.cache = Cached(rec_noise, cfg, st, lat, original_prompt, original_negative_prompt, sigma_adjustment) - rand_noise = processing.create_random_tensors(p.init_latent.shape[1:], [p.seed + x + 1 for x in range(p.init_latent.shape[0])]) + rand_noise = processing.create_random_tensors(p.init_latent.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w, p=p) combined_noise = ((1 - randomness) * rec_noise + randomness * rand_noise) / ((randomness**2 + (1-randomness)**2) ** 0.5) -- cgit v1.2.1 From ca5efc316b9431746ff886d259275310f63f95fb Mon Sep 17 00:00:00 2001 From: LunixWasTaken Date: Tue, 11 Oct 2022 22:04:56 +0200 Subject: Typo fix in watermark hint. --- javascript/hints.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/hints.js b/javascript/hints.js index 045f2d3c..b81c181b 100644 --- a/javascript/hints.js +++ b/javascript/hints.js @@ -80,7 +80,7 @@ titles = { "Scale latent": "Uscale the image in latent space. Alternative is to produce the full image from latent representation, upscale that, and then move it back to latent space.", "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.", - "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be bevaing in an unethical manner.", + "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.", } -- cgit v1.2.1 From 2d006ce16cd95d587533656c3ac4991495e96f23 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 00:56:36 +0900 Subject: xy_grid: Find hypernetwork by closest name --- modules/hypernetworks/hypernetwork.py | 11 +++++++++++ scripts/xy_grid.py | 6 +++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 470659df..8f2192e2 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -120,6 +120,17 @@ def load_hypernetwork(filename): shared.loaded_hypernetwork = None +def find_closest_hypernetwork_name(search: str): + if not search: + return None + search = search.lower() + applicable = [name for name in shared.hypernetworks if search in name.lower()] + if not applicable: + return None + applicable = sorted(applicable, key=lambda name: len(name)) + return applicable[0] + + def apply_hypernetwork(hypernetwork, context, layer=None): hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index ef431105..6f4217ec 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -84,7 +84,11 @@ def apply_checkpoint(p, x, xs): def apply_hypernetwork(p, x, xs): - hypernetwork.load_hypernetwork(x) + if x.lower() in ["", "none"]: + name = None + else: + name = hypernetwork.find_closest_hypernetwork_name(x) + hypernetwork.load_hypernetwork(name) def apply_clip_skip(p, x, xs): -- cgit v1.2.1 From 7dba1c07cb337114507d9c256f9b843162c187d6 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 01:37:09 +0900 Subject: xy_grid: Confirm that hypernetwork options are valid before starting --- scripts/xy_grid.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index 6f4217ec..b2239d0a 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -88,9 +88,19 @@ def apply_hypernetwork(p, x, xs): name = None else: name = hypernetwork.find_closest_hypernetwork_name(x) + if not name: + raise RuntimeError(f"Unknown hypernetwork: {x}") hypernetwork.load_hypernetwork(name) +def confirm_hypernetworks(xs): + for x in xs: + if x.lower() in ["", "none"]: + continue + if not hypernetwork.find_closest_hypernetwork_name(x): + raise RuntimeError(f"Unknown hypernetwork: {x}") + + def apply_clip_skip(p, x, xs): opts.data["CLIP_stop_at_last_layers"] = x @@ -284,6 +294,8 @@ class Script(scripts.Script): for ckpt_val in valslist: if modules.sd_models.get_closet_checkpoint_match(ckpt_val) is None: raise RuntimeError(f"Checkpoint for {ckpt_val} not found") + elif opt.label == "Hypernetwork": + confirm_hypernetworks(valslist) return valslist -- cgit v1.2.1 From 2fffd4bddce12b2c98a5bae5a2cc6d64450d65a0 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 02:20:35 +0900 Subject: xy_grid: Refactor confirm functions --- scripts/xy_grid.py | 73 +++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index b2239d0a..3bb080bf 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -77,12 +77,26 @@ def apply_sampler(p, x, xs): p.sampler_index = sampler_index +def confirm_samplers(p, xs): + samplers_dict = build_samplers_dict(p) + for x in xs: + if x.lower() not in samplers_dict.keys(): + raise RuntimeError(f"Unknown sampler: {x}") + + def apply_checkpoint(p, x, xs): info = modules.sd_models.get_closet_checkpoint_match(x) - assert info is not None, f'Checkpoint for {x} not found' + if info is None: + raise RuntimeError(f"Unknown checkpoint: {x}") modules.sd_models.reload_model_weights(shared.sd_model, info) +def confirm_checkpoints(p, xs): + for x in xs: + if modules.sd_models.get_closet_checkpoint_match(x) is None: + raise RuntimeError(f"Unknown checkpoint: {x}") + + def apply_hypernetwork(p, x, xs): if x.lower() in ["", "none"]: name = None @@ -93,7 +107,7 @@ def apply_hypernetwork(p, x, xs): hypernetwork.load_hypernetwork(name) -def confirm_hypernetworks(xs): +def confirm_hypernetworks(p, xs): for x in xs: if x.lower() in ["", "none"]: continue @@ -135,29 +149,29 @@ def str_permutations(x): return x -AxisOption = namedtuple("AxisOption", ["label", "type", "apply", "format_value"]) -AxisOptionImg2Img = namedtuple("AxisOptionImg2Img", ["label", "type", "apply", "format_value"]) +AxisOption = namedtuple("AxisOption", ["label", "type", "apply", "format_value", "confirm"]) +AxisOptionImg2Img = namedtuple("AxisOptionImg2Img", ["label", "type", "apply", "format_value", "confirm"]) axis_options = [ - AxisOption("Nothing", str, do_nothing, format_nothing), - AxisOption("Seed", int, apply_field("seed"), format_value_add_label), - AxisOption("Var. seed", int, apply_field("subseed"), format_value_add_label), - AxisOption("Var. strength", float, apply_field("subseed_strength"), format_value_add_label), - AxisOption("Steps", int, apply_field("steps"), format_value_add_label), - AxisOption("CFG Scale", float, apply_field("cfg_scale"), format_value_add_label), - AxisOption("Prompt S/R", str, apply_prompt, format_value), - AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list), - AxisOption("Sampler", str, apply_sampler, format_value), - AxisOption("Checkpoint name", str, apply_checkpoint, format_value), - AxisOption("Hypernetwork", str, apply_hypernetwork, format_value), - AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label), - AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label), - AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label), - AxisOption("Sigma noise", float, apply_field("s_noise"), format_value_add_label), - AxisOption("Eta", float, apply_field("eta"), format_value_add_label), - AxisOption("Clip skip", int, apply_clip_skip, format_value_add_label), - AxisOptionImg2Img("Denoising", float, apply_field("denoising_strength"), format_value_add_label), # as it is now all AxisOptionImg2Img items must go after AxisOption ones + AxisOption("Nothing", str, do_nothing, format_nothing, None), + AxisOption("Seed", int, apply_field("seed"), format_value_add_label, None), + AxisOption("Var. seed", int, apply_field("subseed"), format_value_add_label, None), + AxisOption("Var. strength", float, apply_field("subseed_strength"), format_value_add_label, None), + AxisOption("Steps", int, apply_field("steps"), format_value_add_label, None), + AxisOption("CFG Scale", float, apply_field("cfg_scale"), format_value_add_label, None), + AxisOption("Prompt S/R", str, apply_prompt, format_value, None), + AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list, None), + AxisOption("Sampler", str, apply_sampler, format_value, confirm_samplers), + AxisOption("Checkpoint name", str, apply_checkpoint, format_value, confirm_checkpoints), + AxisOption("Hypernetwork", str, apply_hypernetwork, format_value, confirm_hypernetworks), + AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label, None), + AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label, None), + AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label, None), + AxisOption("Sigma noise", float, apply_field("s_noise"), format_value_add_label, None), + AxisOption("Eta", float, apply_field("eta"), format_value_add_label, None), + AxisOption("Clip skip", int, apply_clip_skip, format_value_add_label, None), + AxisOptionImg2Img("Denoising", float, apply_field("denoising_strength"), format_value_add_label, None), # as it is now all AxisOptionImg2Img items must go after AxisOption ones ] @@ -283,19 +297,10 @@ class Script(scripts.Script): valslist = list(permutations(valslist)) valslist = [opt.type(x) for x in valslist] - + # Confirm options are valid before starting - if opt.label == "Sampler": - samplers_dict = build_samplers_dict(p) - for sampler_val in valslist: - if sampler_val.lower() not in samplers_dict.keys(): - raise RuntimeError(f"Unknown sampler: {sampler_val}") - elif opt.label == "Checkpoint name": - for ckpt_val in valslist: - if modules.sd_models.get_closet_checkpoint_match(ckpt_val) is None: - raise RuntimeError(f"Checkpoint for {ckpt_val} not found") - elif opt.label == "Hypernetwork": - confirm_hypernetworks(valslist) + if opt.confirm: + opt.confirm(p, valslist) return valslist -- cgit v1.2.1 From ee015a1af66a94a75c914659fa0d321e702a0a87 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 11:05:57 +0300 Subject: change textual inversion tab to train remake train interface to use tabs --- modules/hypernetworks/hypernetwork.py | 2 +- modules/ui.py | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8f2192e2..8314450a 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -175,7 +175,7 @@ def attention_CrossAttention_forward(self, x, context=None, mask=None): def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_image_prompt): - assert hypernetwork_name, 'embedding not selected' + assert hypernetwork_name, 'hypernetwork not selected' path = shared.hypernetworks.get(hypernetwork_name, None) shared.loaded_hypernetwork = Hypernetwork() diff --git a/modules/ui.py b/modules/ui.py index 4bfdd275..86a2da6c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1035,14 +1035,14 @@ def create_ui(wrap_gradio_gpu_call): sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() - with gr.Blocks() as textual_inversion_interface: + with gr.Blocks() as train_interface: with gr.Row().style(equal_height=False): - with gr.Column(): - with gr.Group(): - gr.HTML(value="

See wiki for detailed explanation.

") + gr.HTML(value="

See wiki for detailed explanation.

") - gr.HTML(value="

Create a new embedding

") + with gr.Row().style(equal_height=False): + with gr.Tabs(elem_id="train_tabs"): + with gr.Tab(label="Create embedding"): new_embedding_name = gr.Textbox(label="Name") initialization_text = gr.Textbox(label="Initialization text", value="*") nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1) @@ -1054,9 +1054,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): create_embedding = gr.Button(value="Create embedding", variant='primary') - with gr.Group(): - gr.HTML(value="

Create a new hypernetwork

") - + with gr.Tab(label="Create hypernetwork"): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) @@ -1067,9 +1065,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary') - with gr.Group(): - gr.HTML(value="

Preprocess images

") - + with gr.Tab(label="Preprocess images"): process_src = gr.Textbox(label='Source directory') process_dst = gr.Textbox(label='Destination directory') process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) @@ -1091,7 +1087,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): run_preprocess = gr.Button(value="Preprocess", variant='primary') - with gr.Group(): + with gr.Tab(label="Train"): gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', choices=[x for x in shared.hypernetworks.keys()]) @@ -1388,7 +1384,7 @@ Requested path was: {f} (extras_interface, "Extras", "extras"), (pnginfo_interface, "PNG Info", "pnginfo"), (modelmerger_interface, "Checkpoint Merger", "modelmerger"), - (textual_inversion_interface, "Textual inversion", "ti"), + (train_interface, "Train", "ti"), (settings_interface, "Settings", "settings"), ] -- cgit v1.2.1 From 80f3cf2bb2ce3f00d801cae2c3a8c20a8d4167d8 Mon Sep 17 00:00:00 2001 From: hentailord85ez <112723046+hentailord85ez@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:48:53 +0100 Subject: Account when lines are mismatched --- modules/sd_hijack.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index ac70f876..2753d4fa 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -321,7 +321,17 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): fixes.append(fix[1]) self.hijack.fixes.append(fixes) - z1 = self.process_tokens([x[:75] for x in remade_batch_tokens], [x[:75] for x in batch_multipliers]) + tokens = [] + multipliers = [] + for i in range(len(remade_batch_tokens)): + if len(remade_batch_tokens[i]) > 0: + tokens.append(remade_batch_tokens[i][:75]) + multipliers.append(batch_multipliers[i][:75]) + else: + tokens.append([self.wrapped.tokenizer.eos_token_id] * 75) + multipliers.append([1.0] * 75) + + z1 = self.process_tokens(tokens, multipliers) z = z1 if z is None else torch.cat((z, z1), axis=-2) remade_batch_tokens = rem_tokens -- cgit v1.2.1 From 8561d5762b98bf7cfb764128ebf11633d8bb4405 Mon Sep 17 00:00:00 2001 From: Kalle Date: Wed, 12 Oct 2022 12:43:11 +0300 Subject: Remove duplicate artist from file --- artists.csv | 1 - 1 file changed, 1 deletion(-) diff --git a/artists.csv b/artists.csv index 14ba2022..99cdbdc6 100644 --- a/artists.csv +++ b/artists.csv @@ -1045,7 +1045,6 @@ Bakemono Zukushi,0.67051035,anime Lucy Madox Brown,0.67032814,fineart Paul Wonner,0.6700563,scribbles Guido Borelli Da Caluso,0.66966087,digipa-high-impact -Guido Borelli da Caluso,0.66966087,digipa-high-impact Emil Alzamora,0.5844039,nudity Heinrich Brocksieper,0.64469147,fineart Dan Smith,0.669563,digipa-high-impact -- cgit v1.2.1 From 429442f4a6aab7301efb89d27bef524fe827e81a Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 13:38:03 +0300 Subject: fix iterator bug for #2295 --- modules/sd_hijack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 2753d4fa..c81722a0 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -323,10 +323,10 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): tokens = [] multipliers = [] - for i in range(len(remade_batch_tokens)): - if len(remade_batch_tokens[i]) > 0: - tokens.append(remade_batch_tokens[i][:75]) - multipliers.append(batch_multipliers[i][:75]) + for j in range(len(remade_batch_tokens)): + if len(remade_batch_tokens[j]) > 0: + tokens.append(remade_batch_tokens[j][:75]) + multipliers.append(batch_multipliers[j][:75]) else: tokens.append([self.wrapped.tokenizer.eos_token_id] * 75) multipliers.append([1.0] * 75) -- cgit v1.2.1 From 50be33e953be93c40814262c6dbce36e66004528 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:13:25 +0100 Subject: formatting --- modules/textual_inversion/image_embedding.py | 170 ++++++++++++++------------- 1 file changed, 91 insertions(+), 79 deletions(-) diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index 1224fb42..898ce3b3 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -2,122 +2,134 @@ import base64 import json import numpy as np import zlib -from PIL import Image,PngImagePlugin,ImageDraw,ImageFont +from PIL import Image, PngImagePlugin, ImageDraw, ImageFont from fonts.ttf import Roboto import torch + class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): - return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} + return {'TORCHTENSOR': obj.cpu().detach().numpy().tolist()} return json.JSONEncoder.default(self, obj) + class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): if 'TORCHTENSOR' in d: return torch.from_numpy(np.array(d['TORCHTENSOR'])) return d + def embedding_to_b64(data): - d = json.dumps(data,cls=EmbeddingEncoder) + d = json.dumps(data, cls=EmbeddingEncoder) return base64.b64encode(d.encode()) + def embedding_from_b64(data): d = base64.b64decode(data) - return json.loads(d,cls=EmbeddingDecoder) + return json.loads(d, cls=EmbeddingDecoder) + def lcg(m=2**32, a=1664525, c=1013904223, seed=0): while True: seed = (a * seed + c) % m - yield seed%255 + yield seed % 255 + def xor_block(block): g = lcg() randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) - return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) + return np.bitwise_xor(block.astype(np.uint8), randblock & 0x0F) -def style_block(block,sequence): - im = Image.new('RGB',(block.shape[1],block.shape[0])) + +def style_block(block, sequence): + im = Image.new('RGB', (block.shape[1], block.shape[0])) draw = ImageDraw.Draw(im) - i=0 - for x in range(-6,im.size[0],8): - for yi,y in enumerate(range(-6,im.size[1],8)): - offset=0 - if yi%2==0: - offset=4 - shade = sequence[i%len(sequence)] - i+=1 - draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + i = 0 + for x in range(-6, im.size[0], 8): + for yi, y in enumerate(range(-6, im.size[1], 8)): + offset = 0 + if yi % 2 == 0: + offset = 4 + shade = sequence[i % len(sequence)] + i += 1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill=(shade, shade, shade)) fg = np.array(im).astype(np.uint8) & 0xF0 return block ^ fg -def insert_image_data_embed(image,data): + +def insert_image_data_embed(image, data): d = 3 - data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) - data_np_ = np.frombuffer(data_compressed,np.uint8).copy() + data_compressed = zlib.compress(json.dumps(data, cls=EmbeddingEncoder).encode(), level=9) + data_np_ = np.frombuffer(data_compressed, np.uint8).copy() data_np_high = data_np_ >> 4 - data_np_low = data_np_ & 0x0F - + data_np_low = data_np_ & 0x0F + h = image.size[1] - next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0]%h)) - next_size = next_size + ((h*d)-(next_size%(h*d))) + next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0] % h)) + next_size = next_size + ((h*d)-(next_size % (h*d))) data_np_low.resize(next_size) - data_np_low = data_np_low.reshape((h,-1,d)) + data_np_low = data_np_low.reshape((h, -1, d)) data_np_high.resize(next_size) - data_np_high = data_np_high.reshape((h,-1,d)) + data_np_high = data_np_high.reshape((h, -1, d)) edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8) - data_np_low = style_block(data_np_low,sequence=edge_style) - data_np_low = xor_block(data_np_low) - data_np_high = style_block(data_np_high,sequence=edge_style[::-1]) - data_np_high = xor_block(data_np_high) + data_np_low = style_block(data_np_low, sequence=edge_style) + data_np_low = xor_block(data_np_low) + data_np_high = style_block(data_np_high, sequence=edge_style[::-1]) + data_np_high = xor_block(data_np_high) - im_low = Image.fromarray(data_np_low,mode='RGB') - im_high = Image.fromarray(data_np_high,mode='RGB') + im_low = Image.fromarray(data_np_low, mode='RGB') + im_high = Image.fromarray(data_np_high, mode='RGB') - background = Image.new('RGB',(image.size[0]+im_low.size[0]+im_high.size[0]+2,image.size[1]),(0,0,0)) - background.paste(im_low,(0,0)) - background.paste(image,(im_low.size[0]+1,0)) - background.paste(im_high,(im_low.size[0]+1+image.size[0]+1,0)) + background = Image.new('RGB', (image.size[0]+im_low.size[0]+im_high.size[0]+2, image.size[1]), (0, 0, 0)) + background.paste(im_low, (0, 0)) + background.paste(image, (im_low.size[0]+1, 0)) + background.paste(im_high, (im_low.size[0]+1+image.size[0]+1, 0)) return background -def crop_black(img,tol=0): - mask = (img>tol).all(2) - mask0,mask1 = mask.any(0),mask.any(1) - col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() - row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() - return img[row_start:row_end,col_start:col_end] + +def crop_black(img, tol=0): + mask = (img > tol).all(2) + mask0, mask1 = mask.any(0), mask.any(1) + col_start, col_end = mask0.argmax(), mask.shape[1]-mask0[::-1].argmax() + row_start, row_end = mask1.argmax(), mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end, col_start:col_end] + def extract_image_data_embed(image): - d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F - black_cols = np.where( np.sum(outarr, axis=(0,2))==0) + d = 3 + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1], image.size[0], d).astype(np.uint8)) & 0x0F + black_cols = np.where(np.sum(outarr, axis=(0, 2)) == 0) if black_cols[0].shape[0] < 2: print('No Image data blocks found.') return None - data_block_lower = outarr[:,:black_cols[0].min(),:].astype(np.uint8) - data_block_upper = outarr[:,black_cols[0].max()+1:,:].astype(np.uint8) + data_block_lower = outarr[:, :black_cols[0].min(), :].astype(np.uint8) + data_block_upper = outarr[:, black_cols[0].max()+1:, :].astype(np.uint8) data_block_lower = xor_block(data_block_lower) data_block_upper = xor_block(data_block_upper) - + data_block = (data_block_upper << 4) | (data_block_lower) data_block = data_block.flatten().tobytes() data = zlib.decompress(data_block) - return json.loads(data,cls=EmbeddingDecoder) + return json.loads(data, cls=EmbeddingDecoder) + -def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): +def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, textfont=None): from math import cos image = srcimage.copy() @@ -130,11 +142,11 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo textfont = Roboto factor = 1.5 - gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) + gradient = Image.new('RGBA', (1, image.size[1]), color=(0, 0, 0, 0)) for y in range(image.size[1]): mag = 1-cos(y/image.size[1]*factor) - mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) - gradient.putpixel((0, y), (0,0,0,int(mag*255))) + mag = max(mag, 1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0, 0, 0, int(mag*255))) image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) draw = ImageDraw.Draw(image) @@ -142,41 +154,41 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo font = ImageFont.truetype(textfont, fontsize) padding = 10 - _,_,w, h = draw.textbbox((0,0),title,font=font) - fontsize = min( int(fontsize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) + _, _, w, h = draw.textbbox((0, 0), title, font=font) + fontsize = min(int(fontsize * (((image.size[0]*0.75)-(padding*4))/w)), 72) font = ImageFont.truetype(textfont, fontsize) - _,_,w,h = draw.textbbox((0,0),title,font=font) - draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) + _, _, w, h = draw.textbbox((0, 0), title, font=font) + draw.text((padding, padding), title, anchor='lt', font=font, fill=(255, 255, 255, 230)) - _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) - fontsize_left = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerMid,font=font) - fontsize_mid = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerRight,font=font) - fontsize_right = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _, _, w, h = draw.textbbox((0, 0), footerLeft, font=font) + fontsize_left = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerMid, font=font) + fontsize_mid = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerRight, font=font) + fontsize_right = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) - font = ImageFont.truetype(textfont, min(fontsize_left,fontsize_mid,fontsize_right)) + font = ImageFont.truetype(textfont, min(fontsize_left, fontsize_mid, fontsize_right)) - draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) + draw.text((padding, image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]/2, image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]-padding, image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255, 255, 255, 230)) return image + if __name__ == '__main__': testEmbed = Image.open('test_embedding.png') - data = extract_image_data_embed(testEmbed) assert data is not None data = embedding_from_b64(testEmbed.text['sd-ti-embedding']) assert data is not None - - image = Image.new('RGBA',(512,512),(255,255,200,255)) + + image = Image.new('RGBA', (512, 512), (255, 255, 200, 255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') - test_embed = {'string_to_param':{'*':torch.from_numpy(np.random.random((2, 4096)))}} + test_embed = {'string_to_param': {'*': torch.from_numpy(np.random.random((2, 4096)))}} embedded_image = insert_image_data_embed(cap_image, test_embed) @@ -191,16 +203,16 @@ if __name__ == '__main__': g = lcg() shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist() - reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, - 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, - 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, - 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, - 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, - 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, - 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, + reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, + 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, + 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, + 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, + 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, + 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, + 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, 204, 86, 73, 222, 44, 198, 118, 240, 97] - assert shared_random == reference_random + assert shared_random == reference_random hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) -- cgit v1.2.1 From 10a2de644f8ea4cfade88e85d768da3480f4c9f0 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:15:35 +0100 Subject: formatting --- modules/textual_inversion/textual_inversion.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 485ef46c..b072d745 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,14 +7,14 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin +from PIL import Image, PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.image_embedding import (embedding_to_b64,embedding_from_b64, - insert_image_data_embed,extract_image_data_embed, - caption_image_overlay ) +from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64, + insert_image_data_embed, extract_image_data_embed, + caption_image_overlay) class Embedding: def __init__(self, vec, name, step=None): @@ -90,10 +90,10 @@ class EmbeddingDatabase: embed_image = Image.open(path) if 'sd-ti-embedding' in embed_image.text: data = embedding_from_b64(embed_image.text['sd-ti-embedding']) - name = data.get('name',name) + name = data.get('name', name) else: data = extract_image_data_embed(embed_image) - name = data.get('name',name) + name = data.get('name', name) else: data = torch.load(path, map_location="cpu") @@ -278,24 +278,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.current_image = image if save_image_with_stored_embedding and os.path.exists(last_saved_file): - + last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png') info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) info.add_text("sd-ti-embedding", embedding_to_b64(data)) - title = "<{}>".format(data.get('name','???')) + title = "<{}>".format(data.get('name', '???')) checkpoint = sd_models.select_checkpoint() footer_left = checkpoint.model_name footer_mid = '[{}]'.format(checkpoint.hash) footer_right = '{}'.format(embedding.step) - captioned_image = caption_image_overlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = insert_image_data_embed(captioned_image,data) + captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right) + captioned_image = insert_image_data_embed(captioned_image, data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) - + image.save(last_saved_image) last_saved_image += f", prompt: {preview_text}" -- cgit v1.2.1 From c3c8eef9fd5a0c8b26319e32ca4a19b56204e6df Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 20:49:47 +0300 Subject: train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options --- javascript/hints.js | 3 ++ modules/hypernetworks/hypernetwork.py | 40 +++++++++------------- modules/shared.py | 3 ++ modules/textual_inversion/dataset.py | 47 +++++++++++++++++++------- modules/textual_inversion/learn_schedule.py | 37 +++++++++++++++++++- modules/textual_inversion/textual_inversion.py | 35 +++++++------------ modules/ui.py | 2 -- 7 files changed, 105 insertions(+), 62 deletions(-) diff --git a/javascript/hints.js b/javascript/hints.js index b81c181b..d51ee14c 100644 --- a/javascript/hints.js +++ b/javascript/hints.js @@ -81,6 +81,9 @@ titles = { "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.", "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.", + + "Filename word regex": "This regular expression will be used extract words from filename, and they will be joined using the option below into label text used for training. Leave empty to keep filename text as it is.", + "Filename join string": "This string will be used to hoin split words into a single line if the option above is enabled.", } diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8314450a..b6c06d49 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -14,7 +14,7 @@ import torch from torch import einsum from einops import rearrange, repeat import modules.textual_inversion.dataset -from modules.textual_inversion.learn_schedule import LearnSchedule +from modules.textual_inversion.learn_schedule import LearnRateScheduler class HypernetworkModule(torch.nn.Module): @@ -223,31 +223,23 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, if ititial_step > steps: return hypernetwork, filename - schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) - (learn_rate, end_step) = next(schedules) - print(f'Training at rate of {learn_rate} until step {end_step}') - - optimizer = torch.optim.AdamW(weights, lr=learn_rate) + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - for i, (x, text, cond) in pbar: + for i, entry in pbar: hypernetwork.step = i + ititial_step - if hypernetwork.step > end_step: - try: - (learn_rate, end_step) = next(schedules) - except Exception: - break - tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') - for pg in optimizer.param_groups: - pg['lr'] = learn_rate + scheduler.apply(optimizer, hypernetwork.step) + if scheduler.finished: + break if shared.state.interrupted: break with torch.autocast("cuda"): - cond = cond.to(devices.device) - x = x.to(devices.device) + cond = entry.cond.to(devices.device) + x = entry.latent.to(devices.device) loss = shared.sd_model(x.unsqueeze(0), cond)[0] del x del cond @@ -267,7 +259,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') - preview_text = text if preview_image_prompt == "" else preview_image_prompt + preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt optimizer.zero_grad() shared.sd_model.cond_stage_model.to(devices.device) @@ -282,16 +274,16 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, ) processed = processing.process_images(p) - image = processed.images[0] + image = processed.images[0] if len(processed.images)>0 else None if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) - shared.state.current_image = image - image.save(last_saved_image) - - last_saved_image += f", prompt: {preview_text}" + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step @@ -299,7 +291,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory,

Loss: {losses.mean():.7f}
Step: {hypernetwork.step}
-Last prompt: {html.escape(text)}
+Last prompt: {html.escape(entry.cond_text)}
Last saved embedding: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

diff --git a/modules/shared.py b/modules/shared.py index 42e99741..e64e69fc 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -231,6 +231,9 @@ options_templates.update(options_section(('system', "System"), { options_templates.update(options_section(('training', "Training"), { "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP from VRAM when training"), + "dataset_filename_word_regex": OptionInfo("", "Filename word regex"), + "dataset_filename_join_string": OptionInfo(" ", "Filename join string"), + "training_image_repeats_per_epoch": OptionInfo(100, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}), })) options_templates.update(options_section(('sd', "Stable Diffusion"), { diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index f61f40d3..67e90afe 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -11,11 +11,21 @@ import tqdm from modules import devices, shared import re -re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") +re_numbers_at_start = re.compile(r"^[-\d]+\s*") + + +class DatasetEntry: + def __init__(self, filename=None, latent=None, filename_text=None): + self.filename = filename + self.latent = latent + self.filename_text = filename_text + self.cond = None + self.cond_text = None class PersonalizedBase(Dataset): def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): + re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex)>0 else None self.placeholder_token = placeholder_token @@ -42,9 +52,18 @@ class PersonalizedBase(Dataset): except Exception: continue + text_filename = os.path.splitext(path)[0] + ".txt" filename = os.path.basename(path) - filename_tokens = os.path.splitext(filename)[0] - filename_tokens = re_tag.findall(filename_tokens) + + if os.path.exists(text_filename): + with open(text_filename, "r", encoding="utf8") as file: + filename_text = file.read() + else: + filename_text = os.path.splitext(filename)[0] + filename_text = re.sub(re_numbers_at_start, '', filename_text) + if re_word: + tokens = re_word.findall(filename_text) + filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens) npimage = np.array(image).astype(np.uint8) npimage = (npimage / 127.5 - 1.0).astype(np.float32) @@ -55,13 +74,13 @@ class PersonalizedBase(Dataset): init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() init_latent = init_latent.to(devices.cpu) + entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent) + if include_cond: - text = self.create_text(filename_tokens) - cond = cond_model([text]).to(devices.cpu) - else: - cond = None + entry.cond_text = self.create_text(filename_text) + entry.cond = cond_model([entry.cond_text]).to(devices.cpu) - self.dataset.append((init_latent, filename_tokens, cond)) + self.dataset.append(entry) self.length = len(self.dataset) * repeats @@ -72,10 +91,10 @@ class PersonalizedBase(Dataset): def shuffle(self): self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] - def create_text(self, filename_tokens): + def create_text(self, filename_text): text = random.choice(self.lines) text = text.replace("[name]", self.placeholder_token) - text = text.replace("[filewords]", ' '.join(filename_tokens)) + text = text.replace("[filewords]", filename_text) return text def __len__(self): @@ -86,7 +105,9 @@ class PersonalizedBase(Dataset): self.shuffle() index = self.indexes[i % len(self.indexes)] - x, filename_tokens, cond = self.dataset[index] + entry = self.dataset[index] + + if entry.cond is None: + entry.cond_text = self.create_text(entry.filename_text) - text = self.create_text(filename_tokens) - return x, text, cond + return entry diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py index db720271..2062726a 100644 --- a/modules/textual_inversion/learn_schedule.py +++ b/modules/textual_inversion/learn_schedule.py @@ -1,6 +1,12 @@ +import tqdm -class LearnSchedule: + +class LearnScheduleIterator: def __init__(self, learn_rate, max_steps, cur_step=0): + """ + specify learn_rate as "0.001:100, 0.00001:1000, 1e-5:10000" to have lr of 0.001 until step 100, 0.00001 until 1000, 1e-5:10000 until 10000 + """ + pairs = learn_rate.split(',') self.rates = [] self.it = 0 @@ -32,3 +38,32 @@ class LearnSchedule: return self.rates[self.it - 1] else: raise StopIteration + + +class LearnRateScheduler: + def __init__(self, learn_rate, max_steps, cur_step=0, verbose=True): + self.schedules = LearnScheduleIterator(learn_rate, max_steps, cur_step) + (self.learn_rate, self.end_step) = next(self.schedules) + self.verbose = verbose + + if self.verbose: + print(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + self.finished = False + + def apply(self, optimizer, step_number): + if step_number <= self.end_step: + return + + try: + (self.learn_rate, self.end_step) = next(self.schedules) + except Exception: + self.finished = True + return + + if self.verbose: + tqdm.tqdm.write(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + for pg in optimizer.param_groups: + pg['lr'] = self.learn_rate + diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index c5153e4a..fa0e33a2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -11,7 +11,7 @@ from PIL import Image, PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.learn_schedule import LearnSchedule +from modules.textual_inversion.learn_schedule import LearnRateScheduler from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64, insert_image_data_embed, extract_image_data_embed, @@ -172,8 +172,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn - -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -205,7 +204,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -221,32 +220,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) - (learn_rate, end_step) = next(schedules) - print(f'Training at rate of {learn_rate} until step {end_step}') - - optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) - for i, (x, text, _) in pbar: + for i, entry in pbar: embedding.step = i + ititial_step - if embedding.step > end_step: - try: - (learn_rate, end_step) = next(schedules) - except: - break - tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') - for pg in optimizer.param_groups: - pg['lr'] = learn_rate + scheduler.apply(optimizer, embedding.step) + if scheduler.finished: + break if shared.state.interrupted: break with torch.autocast("cuda"): - c = cond_model([text]) + c = cond_model([entry.cond_text]) - x = x.to(devices.device) + x = entry.latent.to(devices.device) loss = shared.sd_model(x.unsqueeze(0), c)[0] del x @@ -268,7 +259,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') - preview_text = text if preview_image_prompt == "" else preview_image_prompt + preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, @@ -314,7 +305,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini

Loss: {losses.mean():.7f}
Step: {embedding.step}
-Last prompt: {html.escape(text)}
+Last prompt: {html.escape(entry.cond_text)}
Last saved embedding: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

diff --git a/modules/ui.py b/modules/ui.py index 2b332267..c42535c8 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1098,7 +1098,6 @@ def create_ui(wrap_gradio_gpu_call): training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) steps = gr.Number(label='Max steps', value=100000, precision=0) - num_repeats = gr.Number(label='Number of repeats for a single input image per epoch', value=100, precision=0) create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0) save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0) save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True) @@ -1176,7 +1175,6 @@ def create_ui(wrap_gradio_gpu_call): training_width, training_height, steps, - num_repeats, create_image_every, save_embedding_every, template_file, -- cgit v1.2.1 From 698d303b04e293635bfb49c525409f3bcf671dce Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 21:55:43 +0300 Subject: deepbooru: added option to use spaces or underscores deepbooru: added option to quote (\) in tags deepbooru/BLIP: write caption to file instead of image filename deepbooru/BLIP: now possible to use both for captions deepbooru: process is stopped even if an exception occurs --- modules/deepbooru.py | 65 ++++++++++++++++++----- modules/shared.py | 2 + modules/textual_inversion/preprocess.py | 92 ++++++++++++++------------------- modules/ui.py | 7 +-- 4 files changed, 95 insertions(+), 71 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 29529949..419e6a9c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -2,33 +2,44 @@ import os.path from concurrent.futures import ProcessPoolExecutor import multiprocessing import time +import re + +re_special = re.compile(r'([\\()])') def get_deepbooru_tags(pil_image): """ This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, shared.opts.deepbooru_sort_alpha) - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(pil_image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - tags = shared.deepbooru_process_return["value"] - release_process() - return tags + try: + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts()) + return get_tags_from_process(pil_image) + finally: + release_process() + + +def create_deepbooru_opts(): + from modules import shared -def deepbooru_process(queue, deepbooru_process_return, threshold, alpha_sort): + return { + "use_spaces": shared.opts.deepbooru_use_spaces, + "use_escape": shared.opts.deepbooru_escape, + "alpha_sort": shared.opts.deepbooru_sort_alpha, + } + + +def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts): model, tags = get_deepbooru_tags_model() while True: # while process is running, keep monitoring queue for new image pil_image = queue.get() if pil_image == "QUIT": break else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts) -def create_deepbooru_process(threshold, alpha_sort): +def create_deepbooru_process(threshold, deepbooru_opts): """ Creates deepbooru process. A queue is created to send images into the process. This enables multiple images to be processed in a row without reloading the model or creating a new process. To return the data, a shared @@ -41,10 +52,23 @@ def create_deepbooru_process(threshold, alpha_sort): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, alpha_sort)) + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts)) shared.deepbooru_process.start() +def get_tags_from_process(image): + from modules import shared + + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = shared.deepbooru_process_return["value"] + shared.deepbooru_process_return["value"] = -1 + + return caption + + def release_process(): """ Stops the deepbooru process to return used memory @@ -81,10 +105,15 @@ def get_deepbooru_tags_model(): return model, tags -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort): +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts): import deepdanbooru as dd import tensorflow as tf import numpy as np + + alpha_sort = deepbooru_opts['alpha_sort'] + use_spaces = deepbooru_opts['use_spaces'] + use_escape = deepbooru_opts['use_escape'] + width = model.input_shape[2] height = model.input_shape[1] image = np.array(pil_image) @@ -129,4 +158,12 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') + tags_text = ', '.join(result_tags_out) + + if use_spaces: + tags_text = tags_text.replace('_', ' ') + + if use_escape: + tags_text = re.sub(re_special, r'\\\1', tags_text) + + return tags_text.replace(':', ' ') diff --git a/modules/shared.py b/modules/shared.py index e64e69fc..78b73aae 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -260,6 +260,8 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), + "deepbooru_use_spaces": OptionInfo(False, "use spaces for tags in deepbooru"), + "deepbooru_escape": OptionInfo(True, "escape (\\) brackets in deepbooru (so they are used as literal brackets and not for emphasis)"), })) options_templates.update(options_section(('ui', "User interface"), { diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 113cecf1..3047bede 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -10,7 +10,28 @@ from modules.shared import opts, cmd_opts if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru + def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): + try: + if process_caption: + shared.interrogator.load() + + if process_caption_deepbooru: + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, deepbooru.create_deepbooru_opts()) + + preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) + + finally: + + if process_caption: + shared.interrogator.send_blip_to_ram() + + if process_caption_deepbooru: + deepbooru.release_process() + + + +def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -25,30 +46,28 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) - if process_caption: - shared.interrogator.load() - - if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) - def save_pic_with_caption(image, index): + caption = "" + if process_caption: - caption = "-" + shared.interrogator.generate_caption(image) - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - elif process_caption_deepbooru: - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - caption = "-" + shared.deepbooru_process_return["value"] - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - shared.deepbooru_process_return["value"] = -1 - else: - caption = filename - caption = os.path.splitext(caption)[0] - caption = os.path.basename(caption) + caption += shared.interrogator.generate_caption(image) + + if process_caption_deepbooru: + if len(caption) > 0: + caption += ", " + caption += deepbooru.get_tags_from_process(image) + + filename_part = filename + filename_part = os.path.splitext(filename_part)[0] + filename_part = os.path.basename(filename_part) + + basename = f"{index:05}-{subindex[0]}-{filename_part}" + image.save(os.path.join(dst, f"{basename}.png")) + + if len(caption) > 0: + with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file: + file.write(caption) - image.save(os.path.join(dst, f"{index:05}-{subindex[0]}{caption}.png")) subindex[0] += 1 def save_pic(image, index): @@ -93,34 +112,3 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ save_pic(img, index) shared.state.nextjob() - - if process_caption: - shared.interrogator.send_blip_to_ram() - - if process_caption_deepbooru: - deepbooru.release_process() - - -def sanitize_caption(base_path, original_caption, suffix): - operating_system = platform.system().lower() - if (operating_system == "windows"): - invalid_path_characters = "\\/:*?\"<>|" - max_path_length = 259 - else: - invalid_path_characters = "/" #linux/macos - max_path_length = 1023 - caption = original_caption - for invalid_character in invalid_path_characters: - caption = caption.replace(invalid_character, "") - fixed_path_length = len(base_path) + len(suffix) - if fixed_path_length + len(caption) <= max_path_length: - return caption - caption_tokens = caption.split() - new_caption = "" - for token in caption_tokens: - last_caption = new_caption - new_caption = new_caption + token + " " - if (len(new_caption) + fixed_path_length - 1 > max_path_length): - break - print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr) - return last_caption.strip() diff --git a/modules/ui.py b/modules/ui.py index c42535c8..e07ee0e1 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1074,11 +1074,8 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') - process_caption = gr.Checkbox(label='Use BLIP caption as filename') - if cmd_opts.deepdanbooru: - process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') - else: - process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename', visible=False) + process_caption = gr.Checkbox(label='Use BLIP for caption') + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) with gr.Row(): with gr.Column(scale=3): -- cgit v1.2.1