import torch
import torch._dynamo
import os
import torch.nn.functional as F
from PIL import Image
from pipelines.models import TextToImageRequest
from torch import Generator
from typing import Type
from diffusers import DiffusionPipeline, FluxTransformer2DModel
from huggingface_hub.constants import HF_HUB_CACHE
from transformers import T5EncoderModel

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["TOKENIZERS_PARALLELISM"] = "True"
torch._dynamo.config.suppress_errors = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.enabled = True

Pipeline = None


def load_pipeline() -> Pipeline:
    ckpt_id = "black-forest-labs/FLUX.1-schnell"
    ckpt_revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
    text_encoder_2 = T5EncoderModel.from_pretrained(
        "strong943/autoencoder-tiny",
        revision="33e36134bd12b626986cfc1fee662a82976c6d24",
        subfolder="text_encoder_2",
        torch_dtype=torch.bfloat16,
    )
    path = os.path.join(
        HF_HUB_CACHE,
        "models--strong943--autoencoder-tiny/snapshots/33e36134bd12b626986cfc1fee662a82976c6d24/transformer",
    )
    transformer = FluxTransformer2DModel.from_pretrained(
        path, torch_dtype=torch.bfloat16, use_safetensors=False
    )
    pipeline = DiffusionPipeline.from_pretrained(
        ckpt_id,
        revision=ckpt_revision,
        transformer=transformer,
        text_encoder_2=text_encoder_2,
        torch_dtype=torch.bfloat16,
    )
    pipeline.to("cuda")
    pipeline.to(memory_format=torch.channels_last)
    with torch.inference_mode():
        pipeline(
            prompt="oblivious, drumlet, earthen, bioelectric, radiograph, kinesis, subcortical, cytoplasmic",
            width=1024,
            height=1024,
            guidance_scale=0.0,
            num_inference_steps=4,
            max_sequence_length=256,
        )
    return pipeline


@torch.no_grad()
def infer(
    request: TextToImageRequest, pipeline: Pipeline, generator: Generator
) -> Image:
    return pipeline(
        request.prompt,
        generator=generator,
        guidance_scale=0.0,
        num_inference_steps=4,
        max_sequence_length=256,
        height=request.height,
        width=request.width,
        output_type="pil",
    ).images[0]