Assignment_1_Testing

Sleeping

App Files Files Community

LinkLinkWu commited on Apr 30, 2025

Commit

8ac7c32

verified ·

1 Parent(s): 01ae9b7

Update func.py

Browse files

Files changed (1) hide show

func.py +36 -38

func.py CHANGED Viewed

@@ -32,59 +32,57 @@ def img2text(img: Union[Image.Image, str, Path]) -> str:
         img = Image.open(img)
     return _get_captioner()(img)[0]["generated_text"]
-# Step2. Text Generation (Based on Caption)
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-_MODEL_NAME = "aspis/gpt2-genre-story-generation"
-_PROMPT     = (
     "Write a funny and warm children's story (50-100 words) for ages 3-10, "
     "fully and strictly based on this scene: {caption}\nStory:"
 )
-_tokenizer, _model = None, None
-def _load_story_model():
-    """Lazy-load tokenizer / model once."""
-    global _tokenizer, _model
-    if _model is None:
-        _tokenizer = AutoTokenizer.from_pretrained(_MODEL_NAME)
-        _model     = AutoModelForCausalLM.from_pretrained(_MODEL_NAME)
-        if torch.cuda.is_available():
-            _model = _model.to("cuda")
-    return _tokenizer, _model
 def text2story(caption: str) -> str:
     """
-    Generate a 50-100-word children’s story from an image caption.
     Args:
-        caption: Scene description string.
     Returns:
-        Story text (≤100 words).
     """
-    tok, mdl = _load_story_model()
-    prompt  = _PROMPT.format(caption=caption)
-    inputs  = tok(prompt, return_tensors="pt", add_special_tokens=False)
-    if mdl.device.type == "cuda":
-        inputs = {k: v.to("cuda") for k, v in inputs.items()}
-    gen_ids = mdl.generate(
-        **inputs,
-        max_new_tokens=150,
-        do_sample=True,
-        top_p=0.9,
-        temperature=0.8,
-        pad_token_id=tok.eos_token_id,
-        repetition_penalty=1.1
-    )[0]
-    # drop prompt, decode, keep ≤100 words, end at last period
-    story_ids = gen_ids[inputs["input_ids"].shape[-1]:]
-    story     = tok.decode(story_ids, skip_special_tokens=True).strip()
-    story     = story[: story.rfind(".") + 1] if "." in story else story
     return " ".join(story.split()[:100])
 # Step3. Text to Audio

         img = Image.open(img)
     return _get_captioner()(img)[0]["generated_text"]
+# Step 2.  Caption  ➜  Children’s story   (DeepSeek-R1 1.5 B)
+# -------------------------------------------------------------------
 import torch
+from transformers import pipeline
+_GEN_MODEL   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+_PROMPT_TMPL = (
     "Write a funny and warm children's story (50-100 words) for ages 3-10, "
     "fully and strictly based on this scene: {caption}\nStory:"
 )
+_generator = None
+def _get_generator():
+    """Lazy-load DeepSeek generator once (GPU if available)."""
+    global _generator
+    if _generator is None:
+        _generator = pipeline(
+            "text-generation",
+            model=_GEN_MODEL,
+            device=0 if torch.cuda.is_available() else -1,
+            # common decoding params – can still be overridden in the call
+            max_new_tokens=150,
+            do_sample=True,
+            top_p=0.9,
+            temperature=0.8,
+        )
+    return _generator
 def text2story(caption: str) -> str:
     """
+    Generate a ≤100-word children’s story from the image caption.
     Args:
+        caption: scene description string.
     Returns:
+        Story text (plain string, trimmed to ≤100 words).
     """
+    prompt   = _PROMPT_TMPL.format(caption=caption)
+    gen      = _get_generator()(
+        prompt,
+        return_full_text=False   # only the completion, not the prompt
+    )[0]["generated_text"]
+    # ensure last sentence is closed
+    story = gen.strip()
+    if "." in story:
+        story = story[: story.rfind(".") + 1]
+    # hard cap at 100 words
     return " ".join(story.split()[:100])
 # Step3. Text to Audio