Enabled control of generation parameters; created README.md

Browse files

Files changed (6) hide show

README.md +85 -0
rnnlm_model/pipeline_rnnlm.py +40 -2
rnnlm_model/tokenization_rnnlm.py +6 -3
rnnlm_model/tokenization_utils.py +3 -3
test_model.py +136 -0
test_prompts.json +12 -0

README.md ADDED Viewed

	@@ -0,0 +1,85 @@

+# Creative Help
+This is the model repo for Creative Help, a legacy app for AI-based writing assistance powered by a RNN language model. It was developed in 2016 as one of the first demonstrations of the use of a language model for helping people write stories. For more information, see the following research papers:
+[Automated Assistance for Creative Writing with an RNN Language Model.](https://roemmele.github.io/publications/creative-help-demo.pdf) Melissa Roemmele and Andrew Gordon. Demo at IUI 2018.
+[Linguistic Features of Helpfulness in Automated Support for Creative Writing.](https://roemmele.github.io/publications/creative-help-evaluation.pdf) Melissa Roemmele and Andrew Gordon. Storytelling Workshop at NAACL 2018.
+## Installation
+```bash
+pip install transformers torch
+python -m spacy download en_core_web_sm
+```
+## Loading the Model
+This model uses a custom architecture and tokenizer (which has been semi-automatically adapted from the original implementation [here](https://github.com/roemmele/narrative-prediction)). Load it with `trust_remote_code=True`:
+```python
+from transformers import AutoConfig, AutoModelForCausalLM
+from rnnlm_model import (
+    RNNLMConfig,
+    RNNLMForCausalLM,
+    RNNLMTokenizer,
+    RNNLMTextGenerationPipeline,
+)
+AutoConfig.register("rnnlm", RNNLMConfig)
+AutoModelForCausalLM.register(RNNLMConfig, RNNLMForCausalLM)
+model = AutoModelForCausalLM.from_pretrained(
+    "path/to/model",
+    trust_remote_code=True,
+)
+tokenizer = RNNLMTokenizer.from_pretrained("path/to/model")
+pipe = RNNLMTextGenerationPipeline(model=model, tokenizer=tokenizer)
+```
+## Usage Examples
+Generation uses a base configuration of `max_new_tokens=50`, `do_sample=True`, and `temperature=1.0` unless overridden.
+### Basic Generation (Default Parameters)
+```python
+output = pipe("The storm came", max_new_tokens=50, do_sample=True, temperature=1.0)
+print(output[0]["generated_text"])
+```
+### Limiting by Sentences (`max_new_sents`)
+Limit the decoded output to a specific number of sentences:
+```python
+# At most 1 sentence
+output = pipe(
+    "Sarah closed her laptop and stared out the window.",
+    max_new_tokens=50,
+    max_new_sents=1,
+)
+print(output[0]["generated_text"])
+```
+## Inference API
+When using the Hugging Face Inference API or Inference Endpoints, pass parameters in the request body:
+```json
+{
+  "inputs": "The storm came",
+  "parameters": {
+    "max_new_tokens": 50,
+    "do_sample": true,
+    "temperature": 1.0,
+    "max_new_sents": 2
+  }
+}
+```
+## Test Script
+```bash
+python test_model.py --model_path . --seed 0
+```

rnnlm_model/pipeline_rnnlm.py CHANGED Viewed

@@ -5,6 +5,13 @@ from transformers.pipelines.text_generation import TextGenerationPipeline
 from transformers.pipelines.text_generation import ReturnType
 from transformers import GenerationConfig
 class RNNLMTextGenerationPipeline(TextGenerationPipeline):
     """
@@ -14,6 +21,18 @@ class RNNLMTextGenerationPipeline(TextGenerationPipeline):
     When the tokenizer has generalize_ents=True, entities are extracted from the
     prompt and used to replace ENT_PERSON_0, ENT_GPE_0, etc. in the generated output.
     """
     assistant_model = None  # Class default for transformers compatibility (assisted decoding)
     assistant_tokenizer = None
@@ -28,17 +47,28 @@ class RNNLMTextGenerationPipeline(TextGenerationPipeline):
         if not hasattr(self, "generation_config") or self.generation_config is None:
             self.generation_config = GenerationConfig(
                 pad_token_id=getattr(self.tokenizer, "pad_token_id", 0),
-                max_new_tokens=256,
                 do_sample=True,
-                temperature=0.7,
             )
     def postprocess(
         self,
         model_outputs,
         return_type=ReturnType.NEW_TEXT,
         clean_up_tokenization_spaces=False,
         continue_final_message=None,
     ):
         generated_sequence = model_outputs["generated_sequence"][0]
         input_ids = model_outputs["input_ids"]
@@ -94,6 +124,14 @@ class RNNLMTextGenerationPipeline(TextGenerationPipeline):
                     decode_kw.update(
                         adapt_ents=True, capitalize_ents=True, ents=[ents])
                 # Decode only the generated token IDs, then append to saved prompt
                 prompt_len = 0
                 if input_ids is not None:

 from transformers.pipelines.text_generation import ReturnType
 from transformers import GenerationConfig
+# Decode parameters from RNNLMTokenizer.decode() that users can control via the pipeline
+DECODE_PARAM_NAMES = frozenset({
+    "begin_sentence", "skip_special_tokens", "clean_up_tokenization_spaces",
+    "ents", "adapt_ents", "detokenize", "capitalize_ents",
+    "max_new_sents", "eos_tokens",
+})
 class RNNLMTextGenerationPipeline(TextGenerationPipeline):
     """
     When the tokenizer has generalize_ents=True, entities are extracted from the
     prompt and used to replace ENT_PERSON_0, ENT_GPE_0, etc. in the generated output.
+    Decode parameters (from RNNLMTokenizer.decode) can be controlled when calling the
+    pipeline. Pass any of these as kwargs to override defaults:
+    - begin_sentence (bool): Whether generated text starts a new sentence
+    - skip_special_tokens (bool): Skip special tokens in output
+    - clean_up_tokenization_spaces (bool): Clean up extra spaces
+    - detokenize (bool): Apply detokenization (capitalization, punctuation)
+    - adapt_ents (bool): Replace ENT_* tokens with entities from context
+    - capitalize_ents (bool): Capitalize adapted entity names
+    - max_new_sents (int): Maximum number of sentences to include in decoded output
+    - eos_tokens (list): Token IDs treated as end-of-sequence
+    - ents (dict or list): Custom entity mapping(s) for adaptation
     """
     assistant_model = None  # Class default for transformers compatibility (assisted decoding)
     assistant_tokenizer = None
         if not hasattr(self, "generation_config") or self.generation_config is None:
             self.generation_config = GenerationConfig(
                 pad_token_id=getattr(self.tokenizer, "pad_token_id", 0),
+                max_new_tokens=50,
                 do_sample=True,
+                temperature=1.0,
             )
+    def _sanitize_parameters(self, **kwargs):
+        """Extract RNNLM decode parameters into postprocess_params so users can control them."""
+        # Pull out decode params before passing to parent (they would otherwise go to forward/generate)
+        decode_params = {k: kwargs.pop(k) for k in list(
+            kwargs.keys()) if k in DECODE_PARAM_NAMES}
+        preprocess_params, forward_params, postprocess_params = super(
+        )._sanitize_parameters(**kwargs)
+        postprocess_params["decode_params"] = decode_params
+        return preprocess_params, forward_params, postprocess_params
     def postprocess(
         self,
         model_outputs,
         return_type=ReturnType.NEW_TEXT,
         clean_up_tokenization_spaces=False,
         continue_final_message=None,
+        decode_params=None,
     ):
         generated_sequence = model_outputs["generated_sequence"][0]
         input_ids = model_outputs["input_ids"]
                     decode_kw.update(
                         adapt_ents=True, capitalize_ents=True, ents=[ents])
+                # Apply user-provided decode params (from pipeline call)
+                user_decode = decode_params or {}
+                for k, v in user_decode.items():
+                    if k == "ents":
+                        decode_kw["ents"] = [v] if isinstance(v, dict) else v
+                    else:
+                        decode_kw[k] = v
                 # Decode only the generated token IDs, then append to saved prompt
                 prompt_len = 0
                 if input_ids is not None:

rnnlm_model/tokenization_rnnlm.py CHANGED Viewed

@@ -153,14 +153,17 @@ class RNNLMTokenizer(PreTrainedTokenizer):
         adapt_ents=True,
         detokenize=True,
         capitalize_ents=True,
-        n_sents_per_seq=1,
         eos_tokens=None,
         **kwargs,
     ):
         """Decode token IDs to string. When adapt_ents=True and ents is provided,
         replaces generic ENT_* tokens in the output with entities from the input context.
         ents should be a list of dicts (one per sequence) mapping entity name to type
-        (e.g. {"John": "PERSON_0"} from number_ents(get_ents(...)))."""
         if isinstance(token_ids[0], (list, tuple)):
             seqs = token_ids
         else:
@@ -177,7 +180,7 @@ class RNNLMTokenizer(PreTrainedTokenizer):
             self._lexicon_lookup,
             self.unk_token,
             seqs,
-            n_sents_per_seq=n_sents_per_seq,
             eos_tokens=eos_tokens or [],
             detokenize=detokenize,
             ents=ents or [],

         adapt_ents=True,
         detokenize=True,
         capitalize_ents=True,
+        max_new_sents=None,
         eos_tokens=None,
         **kwargs,
     ):
         """Decode token IDs to string. When adapt_ents=True and ents is provided,
         replaces generic ENT_* tokens in the output with entities from the input context.
         ents should be a list of dicts (one per sequence) mapping entity name to type
+        (e.g. {"John": "PERSON_0"} from number_ents(get_ents(...))).
+        When max_new_sents is None, output length is determined by the token sequence
+        (i.e. by max_new_tokens from generation); when set, output is truncated to that
+        many sentences."""
         if isinstance(token_ids[0], (list, tuple)):
             seqs = token_ids
         else:
             self._lexicon_lookup,
             self.unk_token,
             seqs,
+            max_new_sents=max_new_sents,
             eos_tokens=eos_tokens or [],
             detokenize=detokenize,
             ents=ents or [],

rnnlm_model/tokenization_utils.py CHANGED Viewed

@@ -120,7 +120,7 @@ def replace_ents_in_seq(encoder, seq):
     return seq
-def decode_num_seqs(encoder, lexicon_lookup, unk_word, seqs, n_sents_per_seq=None, eos_tokens=[],
                     detokenize=False, ents=[], capitalize_ents=False, adapt_ents=False,
                     sub_ent_probs=None, begin_sentence=True):
     if not seqs:
@@ -184,8 +184,8 @@ def decode_num_seqs(encoder, lexicon_lookup, unk_word, seqs, n_sents_per_seq=Non
                 seq = " ".join(seq)
             if eos_tokens:  # if filter_n_sents is a number, filter generated sequence to only the first N=filter_n_sents sentences
                 seq = filter_gen_seq(encoder, seq, eos_tokens=eos_tokens)
-            elif n_sents_per_seq:
-                seq = filter_gen_seq(encoder, seq, n_sents=n_sents_per_seq)
             decoded_seqs.append(seq)
     return decoded_seqs

     return seq
+def decode_num_seqs(encoder, lexicon_lookup, unk_word, seqs, max_new_sents=None, eos_tokens=[],
                     detokenize=False, ents=[], capitalize_ents=False, adapt_ents=False,
                     sub_ent_probs=None, begin_sentence=True):
     if not seqs:
                 seq = " ".join(seq)
             if eos_tokens:  # if filter_n_sents is a number, filter generated sequence to only the first N=filter_n_sents sentences
                 seq = filter_gen_seq(encoder, seq, eos_tokens=eos_tokens)
+            elif max_new_sents:
+                seq = filter_gen_seq(encoder, seq, n_sents=max_new_sents)
             decoded_seqs.append(seq)
     return decoded_seqs

test_model.py ADDED Viewed

	@@ -0,0 +1,136 @@

+#!/usr/bin/env python3
+import argparse
+import json
+import os
+import random
+import sys
+def set_seed(seed: int):
+    """Set random seeds for reproducibility."""
+    random.seed(seed)
+    try:
+        import numpy as np
+        np.random.seed(seed)
+    except ImportError:
+        pass
+    try:
+        import torch
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(seed)
+    except ImportError:
+        pass
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_path", "-m", default=".", help="Path to converted model")
+    parser.add_argument(
+        "--prompts", "-p", default="test_prompts.json",
+        help="Path to JSON file with list of prompt strings (default: hf_conversion/test_prompts.json)")
+    parser.add_argument(
+        "--seed", "-s", type=int, default=0,
+        help="Random seed for reproducible generation (default: None, non-deterministic)")
+    parser.add_argument(
+        "--max_new_tokens", type=int, default=None,
+        help="Max tokens to generate (default: 50)")
+    parser.add_argument(
+        "--max_new_sents", type=int, default=None,
+        help="Max sentences in decoded output (default: pipeline default)")
+    args = parser.parse_args()
+    if args.seed is not None:
+        set_seed(args.seed)
+        print(f"Random seed set to {args.seed} for reproducibility")
+    if not os.path.isdir(args.model_path):
+        print(f"Error: Model path {args.model_path} does not exist.")
+        sys.exit(1)
+    prompts_path = args.prompts
+    if prompts_path is None:
+        prompts_path = os.path.join(os.path.dirname(
+            os.path.abspath(__file__)), "test_prompts.json")
+    if not os.path.isfile(prompts_path):
+        print(f"Error: Prompts file {prompts_path} does not exist.")
+        sys.exit(1)
+    print("Loading model and tokenizer...")
+    from transformers import AutoModelForCausalLM
+    # Register custom model and load tokenizer directly (AutoTokenizer doesn't know RNNLMTokenizer)
+    model_path = os.path.abspath(args.model_path)
+    from rnnlm_model import (
+        RNNLMConfig,
+        RNNLMForCausalLM,
+        RNNLMTokenizer,
+        RNNLMTextGenerationPipeline,
+    )
+    from transformers import AutoConfig
+    AutoConfig.register("rnnlm", RNNLMConfig)
+    AutoModelForCausalLM.register(RNNLMConfig, RNNLMForCausalLM)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path, trust_remote_code=True)
+    tokenizer = RNNLMTokenizer.from_pretrained(model_path)
+    print("Creating RNNLMTextGenerationPipeline (with entity adaptation)...")
+    pipe = RNNLMTextGenerationPipeline(
+        model=model,
+        tokenizer=tokenizer,
+    )
+    with open(prompts_path) as f:
+        test_prompts = json.load(f)
+    base_kwargs = dict(
+        max_new_tokens=args.max_new_tokens if args.max_new_tokens is not None else 50,
+        do_sample=True,
+        temperature=1.0,
+        pad_token_id=tokenizer.pad_token_id,
+    )
+    if args.max_new_sents is not None:
+        base_kwargs["max_new_sents"] = args.max_new_sents
+    def run_tests(kwargs):
+        for i, prompt in enumerate(test_prompts):
+            print(f"\n  [{i + 1}/{len(test_prompts)}]")
+            print(f"  PROMPT: ``{prompt}``")
+            output = pipe(prompt, **kwargs)
+            print(f"  GENERATED: ``{output[0]['generated_text']}``")
+    # Test 1: Basic generation with default params
+    print("\n--- Test 1: Basic generation (default params) ---")
+    run_tests(base_kwargs)
+    # Test 2: max_new_tokens=20
+    print("\n--- Test 2: max_new_tokens=20 ---")
+    short_kwargs = {**base_kwargs, "max_new_tokens": 20}
+    run_tests(short_kwargs)
+    # Test 3: max_new_sents=2
+    print("\n--- Test 3: max_new_sents=2 ---")
+    sents_kwargs = {**base_kwargs, "max_new_sents": 2}
+    run_tests(sents_kwargs)
+    # Test 4: max_new_sents=1
+    print("\n--- Test 4: max_new_sents=1 ---")
+    sents1_kwargs = {**base_kwargs, "max_new_sents": 1}
+    run_tests(sents1_kwargs)
+    # Test 5: do_sample=False (greedy decoding)
+    print("\n--- Test 5: do_sample=False ---")
+    greedy_kwargs = {**base_kwargs, "do_sample": False}
+    run_tests(greedy_kwargs)
+    # Test 6: temperature=0.3
+    print("\n--- Test 6: temperature=0.3 ---")
+    low_temp_kwargs = {**base_kwargs, "temperature": 0.3}
+    run_tests(low_temp_kwargs)
+if __name__ == "__main__":
+    main()

test_prompts.json ADDED Viewed

	@@ -0,0 +1,12 @@

+[
+  "She",
+  "The old",
+  "The storm came",
+  "Marcus opened",
+  "The door creaked",
+  "Sarah closed her laptop and stared out the window. The email from her editor had been clear: the manuscript needed major revisions, and she had two weeks.",
+  "The detective studied the crime scene photos spread across his desk. Three victims, three different cities, and one impossible connection that made no sense.",
+  "The ancient library had been sealed for centuries, but the earthquake had cracked the stone. Now dust motes danced in the first light it had seen in",
+  "When the power went out across the city, nobody panicked at first.\nIt was only when the lights stayed off for the second day that people began to worry",
+  "Marcus and Elena walked through the forbidden forest, their torches raised and hearts pounding.\n\nThey had heard rumors of something dark moving among the trees."
+]