forked from FlintyLemming/ComfyUI-AnyText
281 lines
13 KiB
Python
281 lines
13 KiB
Python
|
import os
|
|||
|
import folder_paths
|
|||
|
import re
|
|||
|
import cv2
|
|||
|
import numpy as np
|
|||
|
from .utils import is_module_imported, pil2tensor, get_device_by_name, comfy_tensor_Image2np_Image
|
|||
|
|
|||
|
comfy_temp_dir = folder_paths.get_temp_directory()
|
|||
|
Random_Gen_Mask_path = os.path.join(comfy_temp_dir, "AnyText_random_mask_pos_img.png")
|
|||
|
tmp_pose_img_path = os.path.join(comfy_temp_dir, "AnyText_manual_mask_pos_img.png")
|
|||
|
tmp_ori_img_path = os.path.join(comfy_temp_dir, "AnyText_ori_img.png")
|
|||
|
|
|||
|
class AnyText:
|
|||
|
|
|||
|
def __init__(self):
|
|||
|
self.model = None
|
|||
|
|
|||
|
@classmethod
|
|||
|
def INPUT_TYPES(cls):
|
|||
|
return {
|
|||
|
"required": {
|
|||
|
"AnyText_Loader": ("AnyText_Loader", {"forceInput": True}),
|
|||
|
"prompt": ("STRING", {"default": "A raccoon stands in front of the blackboard with the words \"你好呀~Hello!\" written on it.", "multiline": True}),
|
|||
|
"a_prompt": ("STRING", {"default": "best quality, extremely detailed,4k, HD, supper legible text, clear text edges, clear strokes, neat writing, no watermarks", "multiline": True}),
|
|||
|
"n_prompt": ("STRING", {"default": "low-res, bad anatomy, extra digit, fewer digits, cropped, worst quality, low quality, watermark, unreadable text, messy words, distorted text, disorganized writing, advertising picture", "multiline": True}),
|
|||
|
"mode": (['text-generation', 'text-editing'],{"default": 'text-generation'}),
|
|||
|
"sort_radio": (["↕", "↔"],{"default": "↔"}),
|
|||
|
"revise_pos": ("BOOLEAN", {"default": False}),
|
|||
|
"img_count": ("INT", {"default": 1, "min": 1, "max": 10}),
|
|||
|
"ddim_steps": ("INT", {"default": 20, "min": 2, "max": 100}),
|
|||
|
"seed": ("INT", {"default": 9999, "min": -1, "max": 99999999}),
|
|||
|
"nonEdit_random_gen_width": ("INT", {"default": 512, "min": 128, "max": 1920, "step": 64}),
|
|||
|
"nonEdit_random_gen_height": ("INT", {"default": 512, "min": 128, "max": 1920, "step": 64}),
|
|||
|
# "width": ("INT", {"forceInput": True}),
|
|||
|
# "height": ("INT", {"forceInput": True}),
|
|||
|
"Random_Gen": ("BOOLEAN", {"default": False}),
|
|||
|
"strength": ("FLOAT", {
|
|||
|
"default": 1.00,
|
|||
|
"min": -999999,
|
|||
|
"max": 9999999,
|
|||
|
"step": 0.01
|
|||
|
}),
|
|||
|
"cfg_scale": ("FLOAT", {
|
|||
|
"default": 9,
|
|||
|
"min": 1,
|
|||
|
"max": 99,
|
|||
|
"step": 0.1
|
|||
|
}),
|
|||
|
"eta": ("FLOAT", {
|
|||
|
"default": 0,
|
|||
|
"min": 0,
|
|||
|
"max": 1,
|
|||
|
"step": 0.1
|
|||
|
}),
|
|||
|
"device": (["auto", "cuda", "cpu", "mps", "xpu"],{"default": "auto"}),
|
|||
|
"fp16": ("BOOLEAN", {"default": True}),
|
|||
|
"cpu_offload": ("BOOLEAN", {"default": False, "label_on": "model_to_cpu", "label_off": "unload_model"}),
|
|||
|
"all_to_device": ("BOOLEAN", {"default": False}),
|
|||
|
},
|
|||
|
"optional": {
|
|||
|
"ori_image": ("IMAGE", {"forceInput": True}),
|
|||
|
"pos_image": ("IMAGE", {"forceInput": True}),
|
|||
|
# "show_debug": ("BOOLEAN", {"default": False}),
|
|||
|
},
|
|||
|
}
|
|||
|
|
|||
|
RETURN_TYPES = ("IMAGE",)
|
|||
|
CATEGORY = "ExtraModels/AnyText"
|
|||
|
FUNCTION = "anytext_process"
|
|||
|
TITLE = "AnyText Geneation"
|
|||
|
|
|||
|
def anytext_process(self,
|
|||
|
mode,
|
|||
|
AnyText_Loader,
|
|||
|
ori_image,
|
|||
|
pos_image,
|
|||
|
sort_radio,
|
|||
|
revise_pos,
|
|||
|
Random_Gen,
|
|||
|
prompt,
|
|||
|
cpu_offload,
|
|||
|
# show_debug,
|
|||
|
img_count,
|
|||
|
fp16,
|
|||
|
device,
|
|||
|
all_to_device,
|
|||
|
ddim_steps=20,
|
|||
|
strength=1,
|
|||
|
cfg_scale=9,
|
|||
|
seed="",
|
|||
|
eta=0.0,
|
|||
|
a_prompt="",
|
|||
|
n_prompt="",
|
|||
|
nonEdit_random_gen_width=512,
|
|||
|
nonEdit_random_gen_height=512,
|
|||
|
):
|
|||
|
def prompt_replace(prompt):
|
|||
|
prompt = prompt.replace('“', '"')
|
|||
|
prompt = prompt.replace('”', '"')
|
|||
|
p = '"(.*?)"'
|
|||
|
strs = re.findall(p, prompt)
|
|||
|
if len(strs) == 0:
|
|||
|
strs = [' ']
|
|||
|
else:
|
|||
|
for s in strs:
|
|||
|
prompt = prompt.replace(f'"{s}"', f' * ', 1)
|
|||
|
return prompt
|
|||
|
|
|||
|
def check_overlap_polygon(rect_pts1, rect_pts2):
|
|||
|
poly1 = cv2.convexHull(rect_pts1)
|
|||
|
poly2 = cv2.convexHull(rect_pts2)
|
|||
|
rect1 = cv2.boundingRect(poly1)
|
|||
|
rect2 = cv2.boundingRect(poly2)
|
|||
|
if rect1[0] + rect1[2] >= rect2[0] and rect2[0] + rect2[2] >= rect1[0] and rect1[1] + rect1[3] >= rect2[1] and rect2[1] + rect2[3] >= rect1[1]:
|
|||
|
return True
|
|||
|
return False
|
|||
|
|
|||
|
def count_lines(prompt):
|
|||
|
prompt = prompt.replace('“', '"')
|
|||
|
prompt = prompt.replace('”', '"')
|
|||
|
p = '"(.*?)"'
|
|||
|
strs = re.findall(p, prompt)
|
|||
|
if len(strs) == 0:
|
|||
|
strs = [' ']
|
|||
|
return len(strs)
|
|||
|
|
|||
|
def generate_rectangles(w, h, n, max_trys=200):
|
|||
|
img = np.zeros((h, w, 1), dtype=np.uint8)
|
|||
|
rectangles = []
|
|||
|
attempts = 0
|
|||
|
n_pass = 0
|
|||
|
low_edge = int(max(w, h)*0.3 if n <= 3 else max(w, h)*0.2) # ~150, ~100
|
|||
|
while attempts < max_trys:
|
|||
|
rect_w = min(np.random.randint(max((w*0.5)//n, low_edge), w), int(w*0.8))
|
|||
|
ratio = np.random.uniform(4, 10)
|
|||
|
rect_h = max(low_edge, int(rect_w/ratio))
|
|||
|
rect_h = min(rect_h, int(h*0.8))
|
|||
|
# gen rotate angle
|
|||
|
rotation_angle = 0
|
|||
|
rand_value = np.random.rand()
|
|||
|
if rand_value < 0.7:
|
|||
|
pass
|
|||
|
elif rand_value < 0.8:
|
|||
|
rotation_angle = np.random.randint(0, 40)
|
|||
|
elif rand_value < 0.9:
|
|||
|
rotation_angle = np.random.randint(140, 180)
|
|||
|
else:
|
|||
|
rotation_angle = np.random.randint(85, 95)
|
|||
|
# rand position
|
|||
|
x = np.random.randint(0, w - rect_w)
|
|||
|
y = np.random.randint(0, h - rect_h)
|
|||
|
# get vertex
|
|||
|
rect_pts = cv2.boxPoints(((rect_w/2, rect_h/2), (rect_w, rect_h), rotation_angle))
|
|||
|
rect_pts = np.int32(rect_pts)
|
|||
|
# move
|
|||
|
rect_pts += (x, y)
|
|||
|
# check boarder
|
|||
|
if np.any(rect_pts < 0) or np.any(rect_pts[:, 0] >= w) or np.any(rect_pts[:, 1] >= h):
|
|||
|
attempts += 1
|
|||
|
continue
|
|||
|
# check overlap
|
|||
|
if any(check_overlap_polygon(rect_pts, rp) for rp in rectangles): # type: ignore
|
|||
|
attempts += 1
|
|||
|
continue
|
|||
|
n_pass += 1
|
|||
|
img = cv2.fillPoly(img, [rect_pts], 255)
|
|||
|
cv2.imwrite(Random_Gen_Mask_path, 255-img[..., ::-1])
|
|||
|
rectangles.append(rect_pts)
|
|||
|
if n_pass == n:
|
|||
|
break
|
|||
|
print("attempts:", attempts)
|
|||
|
if len(rectangles) != n:
|
|||
|
raise Exception(f'Failed in auto generate positions after {attempts} attempts, try again!')
|
|||
|
return img
|
|||
|
|
|||
|
if not is_module_imported('AnyText_Pipeline'):
|
|||
|
from .AnyText_scripts.AnyText_pipeline import AnyText_Pipeline
|
|||
|
|
|||
|
#check if prompt is chinese to decide whether to load translator,检测是否为中文提示词,否则不适用翻译。
|
|||
|
prompt_modify = prompt_replace(prompt)
|
|||
|
bool_is_chinese = AnyText_Pipeline.is_chinese(self, prompt_modify)
|
|||
|
|
|||
|
device = get_device_by_name(device)
|
|||
|
loader_out = AnyText_Loader.split("|")
|
|||
|
|
|||
|
if bool_is_chinese == False:
|
|||
|
use_translator = False
|
|||
|
else:
|
|||
|
use_translator = True
|
|||
|
if 'damo/nlp_csanmt_translation_zh2en' in loader_out[3]:
|
|||
|
if not os.access(os.path.join(folder_paths.models_dir, "prompt_generator", "nlp_csanmt_translation_zh2en", "tf_ckpts", "ckpt-0.data-00000-of-00001"), os.F_OK):
|
|||
|
if not is_module_imported('snapshot_download'):
|
|||
|
from modelscope.hub.snapshot_download import snapshot_download
|
|||
|
snapshot_download('damo/nlp_csanmt_translation_zh2en')
|
|||
|
else:
|
|||
|
if not os.access(os.path.join(folder_paths.models_dir, "prompt_generator", "models--utrobinmv--t5_translate_en_ru_zh_small_1024", "model.safetensors"), os.F_OK):
|
|||
|
if not is_module_imported('hg_snapshot_download'):
|
|||
|
from huggingface_hub import snapshot_download as hg_snapshot_download
|
|||
|
hg_snapshot_download(repo_id="utrobinmv/t5_translate_en_ru_zh_small_1024")
|
|||
|
|
|||
|
pipe = AnyText_Pipeline(ckpt_path=loader_out[1], clip_path=loader_out[2], translator_path=loader_out[3], cfg_path=loader_out[4], use_translator=use_translator, device=device, use_fp16=fp16, all_to_device=all_to_device, loaded_model_tensor=self.model)
|
|||
|
|
|||
|
# tensor图片转换为numpy图片
|
|||
|
pos_image = comfy_tensor_Image2np_Image(self, pos_image)
|
|||
|
ori_image = comfy_tensor_Image2np_Image(self, ori_image)
|
|||
|
# 保存转换后的numpy图片到ComfyUI临时文件夹
|
|||
|
pos_image.save(tmp_pose_img_path)
|
|||
|
ori_image.save(tmp_ori_img_path)
|
|||
|
|
|||
|
ori = tmp_ori_img_path
|
|||
|
pos = tmp_pose_img_path
|
|||
|
|
|||
|
if mode == "text-generation":
|
|||
|
ori_image = None
|
|||
|
revise_pos = revise_pos
|
|||
|
else:
|
|||
|
revise_pos = False
|
|||
|
ori_image = ori
|
|||
|
|
|||
|
n_lines = count_lines(prompt)
|
|||
|
if Random_Gen == True:
|
|||
|
generate_rectangles(nonEdit_random_gen_width, nonEdit_random_gen_height, n_lines, max_trys=500)
|
|||
|
pos_img = Random_Gen_Mask_path
|
|||
|
else:
|
|||
|
pos_img = pos
|
|||
|
|
|||
|
# lora_path = r"D:\AI\ComfyUI_windows_portable\ComfyUI\models\loras\ys艺术\sd15_mw_bpch_扁平风格插画v1d1.safetensors"
|
|||
|
# lora_ratio = 1
|
|||
|
# lora_path_ratio = str(lora_path)+ " " + str(lora_ratio)
|
|||
|
# print("\033[93m", lora_path_ratio, "\033[0m")
|
|||
|
|
|||
|
params = {
|
|||
|
"mode": mode,
|
|||
|
"use_fp16": fp16,
|
|||
|
"Random_Gen": Random_Gen,
|
|||
|
"sort_priority": sort_radio,
|
|||
|
"revise_pos": revise_pos,
|
|||
|
# "show_debug": show_debug,
|
|||
|
"image_count": img_count,
|
|||
|
"ddim_steps": ddim_steps - 1,
|
|||
|
"image_width": nonEdit_random_gen_width,
|
|||
|
"image_height": nonEdit_random_gen_height,
|
|||
|
"strength": strength,
|
|||
|
"cfg_scale": cfg_scale,
|
|||
|
"eta": eta,
|
|||
|
"a_prompt": a_prompt,
|
|||
|
"n_prompt": n_prompt,
|
|||
|
# "lora_path_ratio": lora_path_ratio,
|
|||
|
}
|
|||
|
input_data = {
|
|||
|
"prompt": prompt,
|
|||
|
"seed": seed,
|
|||
|
"draw_pos": pos_img,
|
|||
|
"ori_image": ori_image,
|
|||
|
}
|
|||
|
# if show_debug ==True:
|
|||
|
# print(f'\033[93mloader from .util(从.util输入的loader): {AnyText_Loader}, \033[0m\n \
|
|||
|
# \033[93mloader_out split form loader(分割loader得到4个参数): {loader_out}, \033[0m\n \
|
|||
|
# \033[93mFont(字体)--loader_out[0]: {loader_out[0]}, \033[0m\n \
|
|||
|
# \033[93mAnyText Model(AnyText模型)--loader_out[1]: {loader_out[1]}, \033[0m\n \
|
|||
|
# \033[93mclip model(clip模型)--loader_out[2]: {loader_out[2]}, \033[0m\n \
|
|||
|
# \033[93mTranslator(翻译模型)--loader_out[3]: {loader_out[3]}, \033[0m\n \
|
|||
|
# \033[93myaml_file(yaml配置文件): {loader_out[4]}, \033[0m\n) \
|
|||
|
# \033[93mIs Chinese Input(是否中文输入): {use_translator}, \033[0m\n \
|
|||
|
# \033[93mNumber of text-content to generate(需要生成的文本数量): {n_lines}, \033[0m\n \
|
|||
|
# \033[93mpos_image location(遮罩图位置): {pos}, \033[0m\n \
|
|||
|
# \033[93mori_image location(原图位置): {ori}, \033[0m\n \
|
|||
|
# \033[93mSort Position(文本生成位置排序): {sort_radio}, \033[0m\n \
|
|||
|
# \033[93mEnable revise_pos(启用位置修正): {revise_pos}, \033[0m')
|
|||
|
x_samples, results, rtn_code, rtn_warning, debug_info, self.model = pipe(input_data, font_path=loader_out[0], cpu_offload=cpu_offload, **params)
|
|||
|
if rtn_code < 0:
|
|||
|
raise Exception(f"Error in AnyText pipeline: {rtn_warning}")
|
|||
|
output = pil2tensor(x_samples)
|
|||
|
print("\n", debug_info)
|
|||
|
return(output)
|
|||
|
|
|||
|
# Node class and display name mappings
|
|||
|
NODE_CLASS_MAPPINGS = {
|
|||
|
"AnyText": AnyText,
|
|||
|
}
|