File size: 2,437 Bytes

b6b44ab
 
 
77f1528
b6b44ab
 
 
 
 
 
 
 
 
 
 
303ab82
b6b44ab
 
 
f66a0cd
 
8c7686d
 
 
b6b44ab
 
 
8c7686d
 
303ab82
 
b6b44ab
8c7686d
b6b44ab
8c7686d
303ab82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6b44ab
 
 
 
 
 
 
 
 
 
 
8c7686d
b6b44ab
77f1528
b6b44ab

import torch
import torch.nn as nn
from transformers import PreTrainedModel
from .configuration_pixel import TopAIImageConfig

class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(channels),
            nn.ReLU(True),
            nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(channels)
        )

    def forward(self, x):
        return x + self.block(x)

class TopAIImageGenerator(PreTrainedModel):
    config_class = TopAIImageConfig
    
    # תיקון ה-AttributeError: חייב להיות מילון (dict) כדי שתהיה לו מתודה .keys()
    all_tied_weights_keys = {} 

    def __init__(self, config):
        super().__init__(config)
        # שימוש ב-hidden_dim מהקונפיג (512)
        h = config.hidden_dim 
        
        self.text_projection = nn.Linear(config.input_dim, 4 * 4 * h)
        
        # בנייה דינמית שמתאימה בדיוק למשקולות ב-Safetensors
        self.decoder = nn.Sequential(
            # שכבה 0: מ-512 ל-512 (כאן היה ה-Mismatch)
            self._upsample(h, h), 
            # שכבה 1
            ResidualBlock(h),
            # שכבה 2: מ-512 ל-256
            self._upsample(h, 256),
            # שכבה 3
            ResidualBlock(256),
            # שכבה 4: מ-256 ל-128
            self._upsample(256, 128),
            # שכבה 5: מ-128 ל-64
            self._upsample(128, 64),
            # שכבה 6: המעבר הסופי ל-32 פילטרים
            nn.ConvTranspose2d(64, 32, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(True),
            # שכבה 9: המרה לערוצי תמונה (RGB)
            nn.Conv2d(32, config.image_channels, kernel_size=3, padding=1),
            nn.Tanh()
        )

    def _upsample(self, i, o):
        return nn.Sequential(
            nn.ConvTranspose2d(i, o, 4, 2, 1, bias=False),
            nn.BatchNorm2d(o),
            nn.ReLU(True)
        )

    def forward(self, text_embeddings):
        # שינוי צורה למפת מאפיינים ראשונית
        x = self.text_projection(text_embeddings)
        x = x.view(-1, self.config.hidden_dim, 4, 4)
        return self.decoder(x)