qwen-image-to-lora

Runtime error

App Files Files Community

Alexander Bagus commited on 1 day ago

Commit

bb2d84c

1 Parent(s): 506ccf8

22

Browse files

Files changed (15) hide show

.gitignore +16 -0
README.md +6 -7
app.py +228 -4
examples/0_examples.json +7 -0
examples/bird.jpg +0 -0
examples/bottle.jpg +0 -0
examples/pose1.jpg +0 -0
examples/pose2.jpg +0 -0
examples/room.jpg +0 -0
requirements.txt +6 -0
static/footer.md +13 -0
static/header.html +14 -0
utils/image_utils.py +76 -0
utils/prompt_utils.py +71 -0
utils/repo_utils.py +33 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# project
+/models/
+# Packages
+*.egg
+*.egg-info
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib64
+__pycache__

README.md CHANGED Viewed

@@ -1,14 +1,13 @@
 ---
-title: Qwen Image To LoRA
-emoji: 🏆
-colorFrom: red
-colorTo: gray
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: Train LoRA from a single image
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: STARFlow Image
+emoji: 💻
+colorFrom: green
+colorTo: red
 sdk: gradio
+sdk_version: 6.0.2
 app_file: app.py
 pinned: false
+short_description: Generate image from text prompt using Apple STARFlow
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,7 +1,231 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import numpy as np
+import torch, random, json, spaces, time
+# from safetensors.torch import load_file
+# from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
+# from videox_fun.pipeline import ZImageControlPipeline
+# from videox_fun.models import ZImageControlTransformer2DModel
+# from transformers import AutoTokenizer, Qwen3ForCausalLM
+# from diffusers import AutoencoderKL
+# from controlnet_aux.processor import Processor
+from utils import repo_utils, image_utils, prompt_utils
+repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
+repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")
+# MODEL_PATH = "models/Z-Image-Turbo/"
+# CONTROLNET_PATH = "models/Z-Image-Turbo-Fun-Controlnet-Union/Z-Image-Turbo-Fun-Controlnet-Union.safetensors"
+DTYPE = torch.bfloat16
+MAX_SEED = np.iinfo(np.int32).max
+# # load transformer
+# transformer = ZImageControlTransformer2DModel.from_pretrained(
+#     MODEL_PATH,
+#     subfolder="transformer",
+#     transformer_additional_kwargs={
+#         "control_layers_places": [0, 5, 10, 15, 20, 25],
+#         "control_in_dim": 16
+#     },
+#     torch_dtype= DTYPE
+# ).to("cuda")
+# ## Load controlnet
+# state_dict = load_file(CONTROLNET_PATH)
+# state_dict = state_dict["state_dict"] if "state_dict" in state_dict else state_dict
+# m, u = transformer.load_state_dict(state_dict, strict=False)
+# print(f"missing keys: {len(m)}, unexpected keys: {len(u)}")
+# # load ZImageControlPipeline
+# vae = AutoencoderKL.from_pretrained(
+#     MODEL_PATH,
+#     subfolder="vae",
+#     device_map="cuda",
+#     torch_dtype= DTYPE
+# )
+# tokenizer = AutoTokenizer.from_pretrained(
+#     MODEL_PATH,
+#     subfolder="tokenizer"
+# )
+# text_encoder = Qwen3ForCausalLM.from_pretrained(
+#     MODEL_PATH,
+#     subfolder="text_encoder",
+#     torch_dtype=DTYPE,
+# )
+# scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
+#     MODEL_PATH,
+#     subfolder="scheduler"
+# )
+# pipe = ZImageControlPipeline(
+#     vae=vae,
+#     tokenizer=tokenizer,
+#     text_encoder=text_encoder,
+#     transformer=transformer,
+#     scheduler=scheduler,
+# )
+# pipe.to("cuda", DTYPE)
+# def prepare(prompt, is_polish_prompt):
+#     if not is_polish_prompt: return prompt, False
+#     polished_prompt = prompt_utils.polish_prompt(prompt)
+#     return polished_prompt, True
+@spaces.GPU
+def inference(
+    prompt,
+    negative_prompt,
+    seed=42,
+    randomize_seed=True,
+    guidance_scale=1.5,
+    num_inference_steps=8,
+    progress=gr.Progress(track_tqdm=True),
+):
+    timestamp = time.time()
+    print(f"timestamp: {timestamp}")
+#     # process image
+#     print("DEBUG: process image")
+#     if input_image is None:
+#         print("Error: input_image is empty.")
+#         return None
+#     print("DEBUG: control_image_torch")
+#     orig_width, orig_height = input_image.size
+#     control_image, width, height = image_utils.rescale_image(input_image, image_scale, 16, 2048)
+#     control_image_torch = image_utils.get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
+#     # generation
+#     if randomize_seed: seed = random.randint(0, MAX_SEED)
+#     generator = torch.Generator().manual_seed(seed)
+#     output_image = pipe(
+#         prompt=prompt,
+#         negative_prompt = negative_prompt,
+#         width=width,
+#         height=height,
+#         generator=generator,
+#         guidance_scale=guidance_scale,
+#         control_image=control_image_torch,
+#         num_inference_steps=num_inference_steps,
+#         control_context_scale=control_context_scale,
+#     ).images[0]
+#     output_image = output_image.resize((orig_width * image_scale, orig_height * image_scale))
+#     return output_image, seed
+def read_file(path: str) -> str:
+    with open(path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    return content
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 960px;
+}
+"""
+with open('examples/0_examples.json', 'r') as file: examples = json.load(file)
+with gr.Blocks() as demo:
+    with gr.Column(elem_id="col-container"):
+        with gr.Column():
+            gr.HTML(read_file("static/header.html"))
+        with gr.Row():
+            with gr.Column():
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    show_label=False,
+                    lines=2,
+                    placeholder="Enter your prompt",
+                    value="a man in a fishing boat. high quality, detailed"
+                    # container=False,
+                )
+                # is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
+                # control_mode = gr.Radio(
+                #     choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
+                #     value="Canny",
+                #     label="Control Mode"
+                # )
+                run_button = gr.Button("Generate", variant="primary")
+                with gr.Accordion("Advanced Settings", open=False):
+                    negative_prompt = gr.Textbox(
+                        label="Negative prompt",
+                        lines=2,
+                        container=False,
+                        placeholder="Enter your negative prompt",
+                        value="blurry, ugly, bad"
+                    )
+                    with gr.Row():
+                        num_inference_steps = gr.Slider(
+                            label="Steps",
+                            minimum=1,
+                            maximum=30,
+                            step=1,
+                            value=9,
+                        )
+                        control_context_scale = gr.Slider(
+                            label="Context scale",
+                            minimum=0.0,
+                            maximum=1.0,
+                            step=0.01,
+                            value=0.75,
+                        )
+                    with gr.Row():
+                        guidance_scale = gr.Slider(
+                            label="Guidance scale",
+                            minimum=0.0,
+                            maximum=10.0,
+                            step=0.1,
+                            value=1.0,
+                        )
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        value=42,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
+            with gr.Column():
+                output_image = gr.Image(label="Generated image", show_label=False)
+                # polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)
+                # with gr.Accordion("Preprocessor output", open=False):
+                #     control_image = gr.Image(label="Control image", show_label=False)
+        # gr.Examples(examples=examples, inputs=[input_image])
+        gr.Markdown(read_file("static/footer.md"))
+    run_button.click(
+        fn=inference,
+        inputs=[
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[output_image, seed],
+    )
+if __name__ == "__main__":
+    demo.launch(mcp_server=True, css=css)

examples/0_examples.json ADDED Viewed

	@@ -0,0 +1,7 @@

+ [
+	["examples/pose2.jpg", "Woman wearing jeans and tanktop, from dubai", "HED"],
+	["examples/bottle.jpg", "A man holding a bottle", "HED"],
+	["examples/room.jpg", "modern architecture, living room", "Depth"],
+	["examples/pose1.jpg", "A female paladin. Mountain background.", "Pose"],
+	["examples/bird.jpg", "A bird sitting on a branch, cartoon.", "Canny"]
+]

examples/bird.jpg ADDED Viewed

examples/bottle.jpg ADDED Viewed

examples/pose1.jpg ADDED Viewed

examples/pose2.jpg ADDED Viewed

examples/room.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+torch
+transformers
+accelerate
+spaces
+git+https://github.com/huggingface/diffusers.git

static/footer.md ADDED Viewed

	@@ -0,0 +1,13 @@

+## Usage
+- **Polish Prompt**: ZIT needs a detailed prompt, which you can get by enabling polish prompt.
+- **Context Scale**: Similar to strength, the higher the value, the more detail is preserved. The recommended control_context_scale range is 0.65 to 0.80.
+- **Image Scale**: Upscale/downscale image resolution.
+## References
+- **alibaba-pai**: <https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union>
+- **Tongyi-MAI**: <https://huggingface.co/Tongyi-MAI/Z-Image-Turbo>
+- **VideoX-Fun**: <https://github.com/aigc-apps/VideoX-Fun>
+<!-- https://github.com/comfyanonymous/ComfyUI/pull/11062 -->

static/header.html ADDED Viewed

	@@ -0,0 +1,14 @@

+<div style="text-align: center; max-width: 600px; margin: 0 auto;">
+    <h1>
+        Z-Image Turbo (ZIT) - Upscaler
+    </h1>
+    <div class="grid-container" >
+        <p>
+            This project is still in development, but it will be finished very soon!
+            <!-- This space still in development, please let me know if there are errors 🙏
+            <br>
+            Generate image from promt using LongCat-Image, a 6B parameters model designed for photorealism.
+            <br>
+            If you like my work, please support me by visiting <a href="https://aisudo.com/" target="_blank">AiSudo</a> 😊 -->
+    </div>
+</div>

utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+from PIL import Image
+import numpy as np
+def rescale_image(img, scale, nearest=32, max_size=1280):
+    w, h = img.size
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    if new_w > max_size or new_h > max_size:
+        # Calculate new size keeping aspect ratio
+        scale = min(max_size / new_w, max_size / new_h)
+        new_w = int(new_w * scale)
+        new_h = int(new_h * scale)
+    # Adjust to nearest multiple
+    new_w = (new_w // nearest) * nearest
+    new_h = (new_h // nearest) * nearest
+    return img.resize((new_w, new_h), Image.LANCZOS), new_w, new_h
+def padding_image(images, new_width, new_height):
+    new_image = Image.new('RGB', (new_width, new_height), (255, 255, 255))
+    aspect_ratio = images.width / images.height
+    if new_width / new_height > 1:
+        if aspect_ratio > new_width / new_height:
+            new_img_width = new_width
+            new_img_height = int(new_img_width / aspect_ratio)
+        else:
+            new_img_height = new_height
+            new_img_width = int(new_img_height * aspect_ratio)
+    else:
+        if aspect_ratio > new_width / new_height:
+            new_img_width = new_width
+            new_img_height = int(new_img_width / aspect_ratio)
+        else:
+            new_img_height = new_height
+            new_img_width = int(new_img_height * aspect_ratio)
+    resized_img = images.resize((new_img_width, new_img_height))
+    paste_x = (new_width - new_img_width) // 2
+    paste_y = (new_height - new_img_height) // 2
+    new_image.paste(resized_img, (paste_x, paste_y))
+    return new_image
+def get_image_latent(ref_image=None, sample_size=None, padding=False):
+    if ref_image is not None:
+        if isinstance(ref_image, str):
+            ref_image = Image.open(ref_image).convert("RGB")
+            if padding:
+                ref_image = padding_image(
+                    ref_image, sample_size[1], sample_size[0])
+            ref_image = ref_image.resize((sample_size[1], sample_size[0]))
+            ref_image = torch.from_numpy(np.array(ref_image))
+            ref_image = ref_image.unsqueeze(0).permute(
+                [3, 0, 1, 2]).unsqueeze(0) / 255
+        elif isinstance(ref_image, Image.Image):
+            ref_image = ref_image.convert("RGB")
+            if padding:
+                ref_image = padding_image(
+                    ref_image, sample_size[1], sample_size[0])
+            ref_image = ref_image.resize((sample_size[1], sample_size[0]))
+            ref_image = torch.from_numpy(np.array(ref_image))
+            ref_image = ref_image.unsqueeze(0).permute(
+                [3, 0, 1, 2]).unsqueeze(0) / 255
+        else:
+            ref_image = torch.from_numpy(np.array(ref_image))
+            ref_image = ref_image.unsqueeze(0).permute(
+                [3, 0, 1, 2]).unsqueeze(0) / 255
+    return ref_image

utils/prompt_utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+from huggingface_hub import InferenceClient
+# source: https://huggingface.co/spaces/InstantX/Qwen-Image-ControlNet/blob/main/app.py
+def polish_prompt(original_prompt):
+    # """Rewrites the prompt using a Hugging Face InferenceClient."""
+    magic_prompt = "Ultra HD, 4K, cinematic composition"
+    system_prompt = """
+        You are a prompt engineering expert for text-to-image models. Since text-to-image models have limited capabilities in understanding user prompts, you need to identify the core theme and intent of the user's input and improve the model's understanding accuracy and generation quality through optimization and rewriting. The rewrite must strictly retain all information from the user's original prompt without deleting or distorting any details.
+        Specific requirements are as follows:
+        1. The rewrite must not affect any information expressed in the user's original prompt; the rewritten prompt should use coherent natural language, avoid low-information redundant descriptions, and keep the rewritten prompt length as concise as possible.
+        2. Ensure consistency between input and output languages: Chinese input yields Chinese output, and English input yields English output. The rewritten token count should not exceed 512.
+        3. The rewritten description should further refine subject characteristics and aesthetic techniques appearing in the original prompt, such as lighting and textures.
+        4. If the original prompt does not specify an image style, ensure the rewritten prompt uses a **realistic photography style**. If the user specifies a style, retain the user's style.
+        5. When the original prompt requires reasoning to clarify user intent, use logical reasoning based on world knowledge to convert vague abstract descriptions into specific tangible objects (e.g., convert "the tallest animal" to "a giraffe").
+        6. When the original prompt requires text generation, please use double quotes to enclose the text part (e.g., `"50% OFF"`).
+        7. When the original prompt requires generating text-heavy scenes like webpages, logos, UIs, or posters, and no specific text content is specified, you need to infer appropriate text content and enclose it in double quotes. For example, if the user inputs: "A tourism flyer with a grassland theme," it should be rewritten as: "A tourism flyer with the image title 'Grassland'."
+        8. When negative words exist in the original prompt, ensure the rewritten prompt does not contain negative words. For example, "a lakeside without boats" should be rewritten such that the word "boat" does not appear at all.
+        9. Except for text content explicitly requested by the user, **adding any extra text content is prohibited**.
+        Here are examples of rewrites for different types of prompts:
+        # Examples (Few-Shot Learning)
+        1. User Input: An animal with nine lives.
+            Rewrite Output: A cat bathed in soft sunlight, its fur soft and glossy. The background is a comfortable home environment with light from the window filtering through curtains, creating a warm light and shadow effect. The shot uses a medium distance perspective to highlight the cat's leisurely and stretched posture. Light cleverly hits the cat's face, emphasizing its spirited eyes and delicate whiskers, adding depth and affinity to the image.
+        2. User Input: Create an anime-style tourism flyer with a grassland theme.
+            Rewrite Output: In the lower right of the center, a short-haired girl sits sideways on a gray, irregularly shaped rock. She wears a white short-sleeved dress and brown flat shoes, holding a bunch of small white flowers in her left hand, smiling with her legs hanging naturally. The girl has dark brown shoulder-length hair with bangs covering her forehead, brown eyes, and a slightly open mouth. The rock surface has textures of varying depths. To the girl's left and front is lush grass, with long, yellow-green blades, some glowing golden in the sunlight. The grass extends into the distance, forming rolling green hills that fade in color as they recede. The sky occupies the upper half of the picture, pale blue dotted with a few fluffy white clouds. In the upper left corner, there is a line of text in italic, dark green font reading "Explore Nature's Peace". Colors are dominated by green, blue, and yellow, fluid lines, and distinct light and shadow contrast, creating a quiet and comfortable atmosphere.
+        3. User Input: A Christmas sale poster with a red background, promoting a Buy 1 Get 1 Free milk tea offer.
+            Rewrite Output: The poster features an overall red tone, embellished with white snowflake patterns on the top and left side. The upper right features a bunch of holly leaves with red berries and a pine cone. In the upper center, golden 3D text reads "Christmas Heartwarming Feedback" centered, along with red bold text "Buy 1 Get 1". Below, two transparent cups filled with bubble tea are placed side by side; the tea is light brown with dark brown pearls scattered at the bottom and middle. Below the cups, white snow piles up, decorated with pine branches, red berries, and pine cones. A blurry Christmas tree is faintly visible in the lower right corner. The image has high clarity, accurate text content, a unified design style, a prominent Christmas theme, and a reasonable layout, providing strong visual appeal.
+        4. User Input: A woman indoors shot in natural light, smiling with arms crossed, showing a relaxed and confident posture.
+            Rewrite Output: The image features a young Asian woman with long dark brown hair naturally falling over her shoulders, with some strands illuminated by light, showing a soft sheen. Her features are delicate, with long eyebrows, bright and spirited dark brown eyes looking directly at the camera, revealing peace and confidence. She has a high nose bridge, full lips with nude lipstick, and corners of the mouth slightly raised in a faint smile. Her skin is fair, with cheeks and collarbones illuminated by warm light, showing a healthy ruddiness. She wears a black spaghetti strap tank top revealing graceful collarbone lines, and a thin gold necklace with small beads and metal bars glinting in the light. Her outer layer is a beige knitted cardigan, soft in texture with visible knitting patterns on the sleeves. Her arms are crossed over her chest, hands covered by the cardigan sleeves, in a relaxed posture. The background is a pure dark brown without extra decoration, making the figure the absolute focus. The figure is located in the center of the frame. Light enters from the upper right, creating bright spots on her left cheek, neck, and collarbone, while the right side is slightly shadowed, creating a three-dimensional and soft tone. Image details are clear, showcasing skin texture, hair, and clothing materials well. Colors are dominated by warm tones, with the combination of beige and dark brown creating a warm and comfortable atmosphere. The overall style is natural, elegant, and artistic.
+        5. User Input: Create a series of images showing the growth process of an apple from seed to fruit. The series should include four stages: 1. Sowing, 2. Seedling growth, 3. Plant maturity, 4. Fruit harvesting.
+            Rewrite Output: A 4-panel exquisite illustration depicting the growth process of an apple, capturing each stage precisely and clearly. 1. "Sowing": A close-up shot of a hand gently placing a small apple seed into fertile dark soil, with visible soil texture and the seed's smooth surface. The background is a soft-focus garden dotted with green leaves and sunlight filtering through. 2. "Seedling Growth": A young apple sapling breaks through the soil, stretching tender green leaves toward the sky. The scene is set in a vibrant garden illuminated by warm golden light, highlighting the seedling's delicate structure. 3. "Plant Maturity": A mature apple tree, lush with branches and leaves, covered in tender green foliage and developing small apples. The background is a vibrant orchard under a clear blue sky, with dappled sunlight creating a peaceful atmosphere. 4. "Fruit Harvesting": A hand reaches into the tree to pick a ripe red apple, its smooth skin glistening in the sun. The scene shows the abundance of the orchard, with baskets of apples in the background, giving a sense of fulfillment. Each illustration uses a realistic style, focusing on details and harmonious colors to showcase the natural beauty and development of the apple's life cycle.
+        6. User Input: If 1 represents red, 2 represents green, 3 represents purple, and 4 represents yellow, please generate a four-color rainbow based on this rule. The color order from top to bottom is 3142.
+            Rewrite Output: The image consists of four horizontally arranged colored stripes, ordered from top to bottom as purple, red, yellow, and green. A white number is centered on each stripe. The top purple stripe features the number "3", the red stripe below it has the number "1", the yellow stripe further down has the number "4", and the bottom green stripe has the number "2". All numbers use a sans-serif font in pure white, forming a sharp contrast with the background colors to ensure good readability. The stripes have high color saturation and a slight texture. The overall layout is simple and clear, with distinct visual effects and no extra decorative elements, emphasizing the numerical information. The image is high definition, with accurate colors and a consistent style, offering strong visual appeal.
+        7. User Input: A stone tablet carved with "Guan Guan Ju Jiu, On the River Isle", natural light, background is a Chinese garden.
+            Rewrite Output: An ancient stone tablet carved with "Guan Guan Ju Jiu, On the River Isle", the surface covered with traces of time, the writing clear and deep. Natural light falls from above, softly illuminating every detail of the stone tablet and enhancing its sense of history. The background is an elegant Chinese garden featuring lush bamboo forests, winding paths, and quiet pools, creating a serene and distant atmosphere. The overall picture uses a realistic style with rich details and natural light and shadow effects, highlighting the cultural heritage of the stone tablet and the classical beauty of the garden.
+        # Output Format
+        Please directly output the rewritten and optimized Prompt content. Do not include any explanatory language or JSON formatting, and do not add opening or closing quotes yourself.
+    """
+    api_key = os.environ.get("HF_TOKEN")
+    if not api_key:
+        print("Warning: HF_TOKEN is not set. Prompt enhancement is disabled.")
+        return original_prompt
+    if not original_prompt:
+        return magic_prompt
+    # client = InferenceClient(provider="cerebras", api_key=api_key)
+    # messages = []
+    client = InferenceClient(provider="nebius", api_key=api_key)
+    try:
+        completion = client.chat.completions.create(
+            model="Qwen/Qwen3-Coder-30B-A3B-Instruct",
+            max_tokens=256,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": original_prompt}
+            ],
+        )
+        # completion = client.chat.completions.create(
+        #     model="Qwen/Qwen3-235B-A22B-Instruct-2507", messages=messages
+        # )
+        polished_prompt = completion.choices[0].message.content
+        # polished_prompt += f" {magic_prompt}"
+        return polished_prompt.strip().replace("\n", " ")
+    except Exception as e:
+        print(f"Error during prompt enhancement: {e}")
+        return original_prompt

utils/repo_utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os, subprocess
+def clone_repo_if_not_exists(git_url, git_dir):
+    """
+    Clones a Git repository if it doesn't already exist in the git_dir folder.
+    """
+    home_dir = os.path.expanduser("~")
+    models_dir = os.path.join(home_dir, git_dir)
+    # Extract repository name from the Git URL
+    if git_url.endswith(".git"):
+        git_name = git_url.split('/')[-1][:-4]
+    else:
+        git_name = git_url.split('/')[-1]
+    repo_path = os.path.join(models_dir, git_name)
+    if not os.path.exists(models_dir):
+        os.makedirs(models_dir)
+        print(f"Created directory: {models_dir}")
+    if not os.path.exists(repo_path):
+        print(f"Repository '{git_name}' not found in '{models_dir}'. Cloning from {git_url}...")
+        try:
+            subprocess.run(["git", "clone", git_url, repo_path], check=True)
+            print(f"Successfully cloned '{git_name}' to '{repo_path}'.")
+        except subprocess.CalledProcessError as e:
+            print(f"Error cloning repository: {e}")
+        except FileNotFoundError:
+            print("Error: 'git' command not found. Please ensure Git is installed and in your PATH.")
+    else:
+        print(f"Repository '{git_name}' already exists at '{repo_path}'. Skipping clone.")