Update README.md

Browse files

Files changed (1) hide show

README.md +33 -6

README.md CHANGED Viewed

@@ -70,10 +70,9 @@ GPT-124M is a lightweight generative language model fine-tuned on the `fineweb-e
 - **Validation Dataset:** 100 million tokens of HuggingFaceFW/fineweb-edu
 ## Usage
-### Direct Use
 You can use this model for text generation using the `transformers` library.
 ```python
 # Import necessary modules from transformers
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
@@ -81,14 +80,42 @@ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 # Load tokenizer and model
 model_name = "samkeet/GPT_124M"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
 # Create text generation pipeline
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, trust_remote_code=True, device="cpu")
 # Generate text
 result = pipe("Earth revolves around the", do_sample=True, max_length=40, temperature=0.9, top_p=0.5, top_k=50)
-print(result)
 ```
 ### Fine-tuning & Downstream Use
@@ -169,4 +196,4 @@ If you use this model, please cite:
 ```
 ## Contact
-For inquiries, contact [Samkeet Sangai](https://www.linkedin.com/in/samkeet-sangai/).

 - **Validation Dataset:** 100 million tokens of HuggingFaceFW/fineweb-edu
 ## Usage
 You can use this model for text generation using the `transformers` library.
+### Method 1: Using Pipeline
 ```python
 # Import necessary modules from transformers
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 # Load tokenizer and model
 model_name = "samkeet/GPT_124M"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 # Create text generation pipeline
+pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, trust_remote_code=True, device="cpu")
 # Generate text
 result = pipe("Earth revolves around the", do_sample=True, max_length=40, temperature=0.9, top_p=0.5, top_k=50)
+print("Pipeline Output:", result)
+```
+### Method 1: Direct Generation
+```python
+# Import necessary libraries
+import torch
+# Function for direct tokenization and text generation
+def generate_text(input_text, device):
+    tokens = tokenizer.encode(input_text, return_tensors='pt').to(device)
+    model.to(device)
+    # Generate output
+    output = model.generate(
+        tokens,
+        do_sample=True,
+        max_length=40,
+        temperature=0.9,
+        top_p=0.5,
+        top_k=50,
+    )
+    # Decode generated text
+    generated_sentence = tokenizer.decode(output)
+    return generated_sentence
+# Generate text
+input_text = "Earth revolves around the"
+print("Direct Model Output:", generate_text(input_text))
 ```
 ### Fine-tuning & Downstream Use
 ```
 ## Contact
+For inquiries, contact [Samkeet Sangai](https://www.linkedin.com/in/samkeet-sangai/).