Spaces:

ruslanmv
/

TextToVideo-Flux

Paused

App Files Files Community

ruslanmv commited on Feb 1, 2025

Commit

e0b06cd

1 Parent(s): aa17af1

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -81

app.py CHANGED Viewed

@@ -34,34 +34,25 @@ from gtts import gTTS
 from pydub import AudioSegment
 import textwrap
 # Initialize FLUX pipeline only if CUDA is available
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def get_flux_pipeline():
-    """Load FLUX pipeline only when needed to prevent main process CUDA initialization."""
-    if device == "cuda":
-        return DiffusionPipeline.from_pretrained(
-            "black-forest-labs/FLUX.1-schnell",
-            torch_dtype=dtype
-        ).to(device)
-    return None
-flux_pipe = None  # Do not load at startup
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 nltk.download('punkt')
 # Ensure proper multiprocessing start method
-try:
-    multiprocessing.set_start_method("spawn", force=True)
-except RuntimeError:
-    pass  # Ignore errors if the start method is already set
 # Download necessary NLTK data
 def setup_nltk():
@@ -79,7 +70,7 @@ DESCRIPTION = (
 TITLE = "Video Story Generator with Audio by using FLUX, distilbart, and GTTS."
 # Load Tokenizer and Model for Text Summarization
-def load_text_summarization_model_v1():
     """Load the tokenizer and model for text summarization."""
     print("Loading text summarization model...")
     tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
@@ -89,25 +80,6 @@ def load_text_summarization_model_v1():
     model.to(device)
     return tokenizer, model, device
-def load_text_summarization_model():
-    """Load the tokenizer and model for text summarization without triggering CUDA init."""
-    print("Loading text summarization model...")
-    if "SPACE_ID" in os.environ:  # Detect if running in Hugging Face Spaces
-        os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Prevent CUDA initialization
-    tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
-    model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
-    if torch.cuda.is_available() and "SPACE_ID" not in os.environ:
-        device = torch.device("cuda:0")
-    else:
-        device = torch.device("cpu")
-    print(f"Using device: {device}")
-    model.to(device)
-    return tokenizer, model, device
 tokenizer, model, device = load_text_summarization_model()
 # Log GPU Memory (optional, for debugging)
@@ -130,8 +102,8 @@ def check_gpu_availability():
 check_gpu_availability()
-#@spaces.GPU()
-def generate_image_with_flux_old(
     text: str,
     seed: int = 42,
     width: int = 1024,
@@ -169,45 +141,6 @@ def generate_image_with_flux_old(
     print("DEBUG: Image generated successfully.")
     return image
-@spaces.GPU()
-def generate_image_with_flux(
-    text: str,
-    seed: int = 42,
-    width: int = 1024,
-    height: int = 1024,
-    num_inference_steps: int = 4,
-    randomize_seed: bool = True
-):
-    print(f"DEBUG: Generating image with FLUX for text: '{text}'")
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    # Load FLUX pipeline only when needed
-    global flux_pipe
-    if flux_pipe is None:
-        flux_pipe = get_flux_pipeline()  # Delayed initialization
-    if flux_pipe is None:
-        raise RuntimeError("FLUX pipeline is not available. Check CUDA or environment settings.")
-    image = flux_pipe(
-        prompt=text,
-        width=width,
-        height=height,
-        num_inference_steps=num_inference_steps,
-        generator=generator,
-        guidance_scale=0.0
-    ).images[0]
-    print("DEBUG: Image generated successfully.")
-    return image
 # --------- End of MinDalle Functions ---------
 # Merge audio files
@@ -451,6 +384,4 @@ with demo:
     )
 # Launch the Gradio app
-#demo.launch(debug=True, share=False)
-demo.launch(debug=True, share="SPACE_ID" in os.environ)

 from pydub import AudioSegment
 import textwrap
 # Initialize FLUX pipeline only if CUDA is available
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+if device == "cuda":
+    flux_pipe = DiffusionPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-schnell",
+        torch_dtype=dtype
+    ).to(device)
+else:
+    flux_pipe = None  # Avoid initializing the model when CUDA is unavailable
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 nltk.download('punkt')
 # Ensure proper multiprocessing start method
+multiprocessing.set_start_method("spawn", force=True)
 # Download necessary NLTK data
 def setup_nltk():
 TITLE = "Video Story Generator with Audio by using FLUX, distilbart, and GTTS."
 # Load Tokenizer and Model for Text Summarization
+def load_text_summarization_model():
     """Load the tokenizer and model for text summarization."""
     print("Loading text summarization model...")
     tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
     model.to(device)
     return tokenizer, model, device
 tokenizer, model, device = load_text_summarization_model()
 # Log GPU Memory (optional, for debugging)
 check_gpu_availability()
+@spaces.GPU()
+def generate_image_with_flux(
     text: str,
     seed: int = 42,
     width: int = 1024,
     print("DEBUG: Image generated successfully.")
     return image
 # --------- End of MinDalle Functions ---------
 # Merge audio files
     )
 # Launch the Gradio app
+demo.launch(debug=True, share=False)