smgp / src /utils /image_generation.py
muhammadmaazuddin's picture
worked on social media agent
339c417
import os
import re
from google import genai
from huggingface_hub import InferenceClient
from dotenv import load_dotenv, find_dotenv
from pydantic import BaseModel, Field, ConfigDict
from langchain_core.output_parsers import JsonOutputParser
import json
import time
import fal_client
from PIL import Image
from io import BytesIO
from IPython.display import display
import asyncio
_: bool = load_dotenv(find_dotenv())
client = genai.Client(
# vertexai=True, project='1089055075981', location='us-central1'
)
aspect_ratios = {
"1:1": (1200, 1200), # Standard LinkedIn square post
"16:9": (1200, 627), # LinkedIn horizontal post
}
hf_client = InferenceClient(
provider="replicate",
api_key=os.environ["HF_TOKEN"],
)
post_schema_old = """{
"meta": {
"platform": "LinkedIn | Instagram | Twitter | TikTok",
"aspectRatio": "1:1 | 16:9 | 9:16 | 4:5",
"style": "Modern | Minimal | Bold | Corporate | Playful"
},
"brand": {
"company": "Company Name",
"logo": "Include | Exclude",
"colors": ["#HEX1", "#HEX2", "#HEX3"]
},
"content": {
"headline": "Main text (max 8 words)",
"subtext": "Optional supporting line (max 15 words)",
"tone": "Professional | Inspirational | Energetic | Friendly"
},
"visuals": {
"primarySubject": "Headline with abstract icons | Infographic shapes | Product mockup",
"elements": ["Icons", "Illustrations", "Charts", "Abstract shapes"],
"composition": "Centered | Rule of Thirds | Balanced layout"
},
"design": {
"typography": "Bold, clean, sans-serif | Minimal serif",
"contrast": "High | Medium",
"background": "Solid color | Gradient | Abstract"
},
"finishing": {
"quality": "Ultra-sharp, high-resolution",
"effects": "Subtle shadows | Soft gradients | None"
}
}
"""
normal_schema = """
{{
"meta": {{
"styleName": "...", // A unique, descriptive name for this specific image style or preset (e.g., "Ethereal Forest Magic", "Cyberpunk Noir Alley").
"aspectRatio": "...", // The proportional relationship between the width and height of the image (e.g., "16:9", "1:1", "4:5", "21:9").
"promptPrefix": "..." // Optional text to prepend to a generated prompt, like a file name, a version number, or a specific trigger word.
}},
"camera": {{
"model": "...", // Describes the camera, lens, or artistic medium used (e.g., "DSLR", "iPhone 15 Pro", "8x10 view camera", "Watercolor on cold-press paper", "3D render in Blender").
"focalLength": "...", // The lens's focal length, which affects the field of view and perspective distortion (e.g., "16mm wide-angle", "85mm portrait", "200mm telephoto", "Isometric perspective").
"angle": "...", // The camera's angle relative to the main subject or scene (e.g., "eye-level", "high-angle", "dutch angle", "drone shot", "worm's-eye view").
"type": "..." // The genre or type of photography or art style (e.g., "macro photography", "landscape", "fantasy illustration", "architectural rendering", "abstract art").
}},
"subject": {{
"primary": "...", // The main focal point or subject of the image (e.g., "a majestic mountain range", "a lone wolf", "an ancient wizard", "a futuristic cityscape", "an abstract shape").
"emotion": "...", // The dominant emotion or mood conveyed by the subject or the overall scene (e.g., "serene and peaceful", "joyful", "melancholy", "menacing", "awe-inspiring").
"pose": "...", // The posture, action, or arrangement of the subject(s) (e.g., "running towards the camera", "sitting in quiet contemplation", "a winding river", "a chaotic explosion").
"gaze": "..." // The direction of the subject's gaze or the directional focus of the composition (e.g., "looking off-camera", "breaking the fourth wall", "facing away from the viewer", "pointing towards the horizon").
}},
"character": {{
"appearance": "...", // Detailed physical description of a character or key object (e.g., "weathered face with a long white beard", "sleek, chrome-plated robot", "moss-covered ancient tree").
"wardrobe": "...", // Clothing, armor, or any form of covering on the subject (e.g., "ornate golden armor", "tattered rags", "a vibrant kimono", "a car's glossy paint job").
"accessories": "..." // Additional items worn by or associated with the subject (e.g., "a magical amulet", "cybernetic implants", "a pair of glasses", "a sword and shield").
}},
"composition": {{
"theory": "...", // The compositional rules or theories applied (e.g., "rule of thirds", "golden ratio", "leading lines", "symmetrical balance", "negative space").
"visualHierarchy": "..." // Describes the order in which the viewer's eye is drawn to different elements in the scene, from most to least prominent.
}},
"setting": {{
"environment": "...", // The general environment or location of the scene (e.g., "a mystical forest", "a bustling cyberpunk city", "a tranquil beach at sunset", "a minimalist white room", "the surface of Mars").
"architecture": "...", // Describes any buildings, ruins, or significant natural structures (e.g., "gothic cathedrals", "brutalist architecture", "alien monoliths", "towering rock formations").
"furniture": "..." // Key objects, props, or furniture within the setting that add context or detail (e.g., "a single throne", "scattered futuristic debris", "a rustic wooden fence").
}},
"lighting": {{
"source": "...", // The primary source of light in the scene (e.g., "dramatic moonlight", "soft window light", "flickering candlelight", "neon signs", "magical glow").
"direction": "...", // The direction from which the light originates (e.g., "backlighting", "rim lighting", "top-down light", "light from below").
"quality": "..." // The quality and characteristics of the light and shadows (e.g., "soft and diffused", "hard and high-contrast", "dappled", "volumetric light rays", "caustic reflections").
}},
"style": {{
"artDirection": "...", // The overarching artistic style, movement, or influence (e.g., "impressionism", "art deco", "cyberpunk", "vaporwave", "ghibli-inspired", "cinematic").
"mood": "..." // The overall mood, feeling, or atmosphere of the image (e.g., "ethereal and dreamy", "dystopian and gritty", "whimsical and cheerful", "epic and dramatic").
}},
"rendering": {{
"engine": "...", // The rendering engine, technique, or medium used to create the final image (e.g., "Octane Render", "oil painting", "cross-hatching", "pixel art", "Unreal Engine 5").
"fidelitySpec": "...", // Specific details about the image's texture and fidelity (e.g., "heavy film grain", "sharp digital focus", "visible brushstrokes", "chromatic aberration", "lens flare").
"postProcessing": "..." // Any post-processing or finishing effects applied (e.g., "color grading with a teal and orange look", "vignette", "bloom and glare", "a vintage photo filter").
}},
"colorPalette": {{
"primaryColors": [ // The most dominant colors that define the overall color scheme of the image.
{{ "name": "...", "hex": "...", "percentage": "..." }},
{{ "name": "...", "hex": "...", "percentage": "..." }}
],
"accentColors": [ // Complementary or contrasting colors used for emphasis, detail, or highlights.
{{ "name": "...", "hex": "...", "percentage": "..." }},
{{ "name": "...", "hex": "...", "percentage": "..." }}
]
}}
}}
"""
# async def generate(
# design_brief: str,
# output_dir: str = "images",
# aspectRatio: str = "1:1",
# num_images: int = 1,
# ):
# """Orchestrates the process of generating JSON and then generating images."""
# os.makedirs(output_dir, exist_ok=True)
# designSpec = generate_designSpec_from_brief(design_brief,f"aspect_ratio:{aspectRatio}")
# await generate_images(
# file_name='image-'+time.strftime("%Y%m%d-%H%M%S"),
# idea=design_brief,
# prompt=,
# aspectRatio="1:1",
# output_dir=output_dir,
# num_images=num_images,
# )
if __name__ == "__main__":
ideas = [
# "A energy drink with water drops on it, ultra realistic, for a commercial.",
# "Graffiti with the text 'JSON Schema' on a brick wall.",
# "A LEGO knight fighting a huge, fire-breathing dragon on a castle wall.",
# "A stylish woman sipping coffee at a Parisian cafe, with the Eiffel Tower in the background. Shot in golden hour.",
# "An emotional, close-up portrait of an old fisherman.",
# "A vast, alien landscape on a distant planet with two suns, strange, towering rock formations, and bioluminescent plants. Epic sci-fi concept art.",
# "A whimsical illustration of a friendly fox reading a book in a cozy, cluttered library. The text 'The Midnight Reader' should be subtly integrated on a book spine.",
# "A magical man with sparkling pink hair and large from an anime.",
# "A cartoon robot waving happily, with a simple, bold outline and bright, flat colors. ",
# "A full-body character sheet of a realistic pirate captain, showing front, back, and side views.",
]
asyncio.run(generate(idea="""
Create a media for posting on linkedin plateform, make it modren and professional.
companydetails:
Design Atom is a subscription-based design service tailored for AI and tech startups, offering comprehensive product design, branding, and web development solutions. With a fixed monthly rate, dedicated designers, and fast 4-hour average turnaround times, they have completed over 200 projects, helped clients raise $45M+, and reached 100M+ users with their designs. Trusted by 45+ global companies, including Y Combinator-backed startups, Design Atom delivers startup-proven results, streamlining UX, boosting retention, and enhancing conversions. Their free design trial eliminates bottlenecks, supporting startups in building investor-ready, user-friendly products.
""",aspectRatio="1:1"))
# response = client.models.generate_images(
# model="imagen-4.0-generate-preview-06-06",
# prompt=prompt,
# config=genai.types.GenerateImagesConfig(
# number_of_images=num_images,
# aspect_ratio=aspectRatio,
# ),
# )
# width, height = aspect_ratios[aspectRatio]
# print(width, height)
# response = hf_client.text_to_image(
# prompt=prompt,
# negative_prompt="social media logos, LinkedIn icon, Instagram logo, Twitter logo, browser bars, fake UI, buttons, links, gibberish text, distorted fonts",
# model="Qwen/Qwen-Image",
# width=width,
# height=height,
# guidance_scale=10.0,
# )
# print(response)
# response = client.models.generate_content(
# model="gemini-2.5-flash-image-preview",
# contents=prompt,
# )