from llama_cpp import Llama
tags_str= "chubby boy,asian,china,boy,shota,teen,dark_skin,(fat:1.2), penis,round face,standing,highres,realistic,real,photo,full_shot, penis"
llm = Llama(
    model_path="./models/text_encoders/Qwen3-4B-Instruct-2507-Q4_0.gguf",
    chat_format="qwen",  # llama-cpp-python 
    verbose=False
)

messages = [
    {"role": "system", "content": "You are an expert prompt engineer for the FLUX.1 image generation model."},
    {"role": "user", "content":f"Convert the following comma-separated tags into a single, detailed, and vivid natural language paragraph. "
            f"Focus on describing the subject, action, environment, lighting, and camera angle. "
            f"Do not output any extra text, explanations, or markdown formatting. Output ONLY the prompt string."
            f"\nTags: {tags_str}"}
]
print("starting!!!")
for i in range(5):    
    response = llm.create_chat_completion(
        messages=messages,
        max_tokens=512,
        temperature=0.7
    )
    #print("response+++",response)
    print("response2+++",response['choices'][0]['message']['content'])