Assignment_1_Testing

Sleeping

App Files Files Community

LinkLinkWu commited on May 1, 2025

Commit

9634e12

verified ·

1 Parent(s): 79a38ee

Update func.py

Browse files

Files changed (1) hide show

func.py +43 -27

func.py CHANGED Viewed

@@ -33,34 +33,27 @@ def img2text(img: Union[Image.Image, str, Path]) -> str:
     return _get_captioner()(img)[0]["generated_text"]
 # -------------------------------------------------------------------
-# Step 2.  Caption ➜ Children’s story   (DeepSeek-R1 1.5 B)
 # -------------------------------------------------------------------
 import torch, re
-from transformers import pipeline
-_GEN_MODEL   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
 _PROMPT_TMPL = (
     "Write a funny and warm children's story (50-100 words) for ages 3-10, "
     "fully and strictly based on this scene: {caption}\nStory:"
 )
-_generator = None
-def _get_generator():
-    """Lazy-load DeepSeek generator once (GPU if available)."""
-    global _generator
-    if _generator is None:
-        _generator = pipeline(
-            "text-generation",
-            model=_GEN_MODEL,
-            device=0 if torch.cuda.is_available() else -1,
-            max_new_tokens=150,
-            do_sample=True,
-            top_p=0.9,
-            temperature=0.8,
-            no_repeat_ngram_size=4,    # ← block 4-gram repeats
-            repetition_penalty=1.15    # ← soften copy-loops
-        )
-    return _generator
 def _dedup_sentences(text: str) -> str:
@@ -76,7 +69,7 @@ def _dedup_sentences(text: str) -> str:
 def text2story(caption: str) -> str:
     """
-    Generate a ≤100-word children’s story from the image caption.
     Args:
         caption: scene description string.
@@ -85,15 +78,38 @@ def text2story(caption: str) -> str:
         Story text (plain string, ≤100 words, no exact duplicate sentences).
     """
     prompt = _PROMPT_TMPL.format(caption=caption)
-    raw    = _get_generator()(prompt, return_full_text=False)[0]["generated_text"]
-    story  = _dedup_sentences(raw)
-    # ensure ending punctuation
     if story and story[-1] not in ".!?":
         story += "."
-    # hard cap at 100 words
     return " ".join(story.split()[:100])
 # Step3. Text to Audio

     return _get_captioner()(img)[0]["generated_text"]
 # -------------------------------------------------------------------
+# Step 2. Caption ➜ Children’s story (BLOOM-560M)
 # -------------------------------------------------------------------
 import torch, re
+from transformers import AutoTokenizer, AutoModelForCausalLM
 _PROMPT_TMPL = (
     "Write a funny and warm children's story (50-100 words) for ages 3-10, "
     "fully and strictly based on this scene: {caption}\nStory:"
 )
+_tokenizer = None
+_model = None
+def _get_model_and_tokenizer():
+    """Lazy-load BLOOM-560M model and tokenizer once (GPU if available)."""
+    global _tokenizer, _model
+    if _tokenizer is None or _model is None:
+        _tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+        _model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
+        if torch.cuda.is_available():
+            _model = _model.to("cuda")
+    return _tokenizer, _model
 def _dedup_sentences(text: str) -> str:
 def text2story(caption: str) -> str:
     """
+    Generate a ≤100-word children’s story from the image caption using BLOOM-560M.
     Args:
         caption: scene description string.
         Story text (plain string, ≤100 words, no exact duplicate sentences).
     """
     prompt = _PROMPT_TMPL.format(caption=caption)
+    tokenizer, model = _get_model_and_tokenizer()
+    # Tokenize input
+    inputs = tokenizer(prompt, return_tensors="pt")
+    if torch.cuda.is_available():
+        inputs = {k: v.to("cuda") for k, v in inputs.items()}
+    # Generate text
+    outputs = model.generate(
+        inputs["input_ids"],
+        max_new_tokens=150,
+        do_sample=True,
+        top_p=0.9,
+        temperature=0.8,
+        no_repeat_ngram_size=4,    # Block 4-gram repeats
+        repetition_penalty=1.15,   # Soften copy-loops
+        pad_token_id=tokenizer.eos_token_id
+    )
+    # Decode generated text
+    raw = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove prompt from output
+    story = raw[len(prompt):].strip()
+    # Deduplicate sentences
+    story = _dedup_sentences(story)
+    # Ensure ending punctuation
     if story and story[-1] not in ".!?":
         story += "."
+    # Hard cap at 100 words
     return " ".join(story.split()[:100])
 # Step3. Text to Audio