Text Generation
Transformers
science
biology
astronomy
physics
math
chemistry
medical
law
finance
economy
history
philosophy
politics
education
art
music
sports
games
movies
series
cooking
recipes
travel
tourism
technology
computer-science
library
information-science
design
photography
journalism
media
sociology
psychology
anthropology
archaeology
linguistics
literature
language-learning
environment
animal
plant
weather
space
time
geology
mineralogy
geography
climate
sustainability
fashion
beauty
lifestyle
home-improvement
gardening
parenting
health
wellness
spirituality
religion
mythology
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| def run_experiment(model_id): | |
| print(f"Loading model and tokenizer for {model_id}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer | |
| ) | |
| # Experiment Cases | |
| test_cases = [ | |
| { | |
| "name": "Completion vs Instruction", | |
| "prompt": "Instruction: Tell me a short story about a robot who discovered coffee.\nStory:", | |
| "explanation": "Expected to see if it continues the 'Story:' or repeats the Instruction block." | |
| }, | |
| { | |
| "name": "Logical Transitivity", | |
| "prompt": "If a cat is larger than a mouse, and a mouse is larger than an ant, then a cat is", | |
| "explanation": "Testing basic reasoning logic." | |
| }, | |
| { | |
| "name": "Arithmetic Edge Case", | |
| "prompt": "Question: What is 123 multiplied by 456? Answer: ", | |
| "explanation": "Testing calculation ability for non-trivial numbers." | |
| }, | |
| { | |
| "name": "Spatial Reasoning", | |
| "prompt": "There is a cup on the table. A book is on top of the cup. A pen is on top of the book. Where is the cup relative to the pen?", | |
| "explanation": "Testing awareness of spatial hierarchies." | |
| }, | |
| { | |
| "name": "Niche Factuality", | |
| "prompt": "The capital of the fictional planet Xylophon is", | |
| "explanation": "Checking if it hallucinates a plausible-sounding name or stops." | |
| } | |
| ] | |
| print("\n--- Starting Experiments ---\n") | |
| for case in test_cases: | |
| print(f"Testing: {case['name']}") | |
| print(f"Prompt: {case['prompt']}") | |
| # We use a relatively low max_new_tokens for base model testing | |
| outputs = pipe( | |
| case['prompt'], | |
| max_new_tokens=50, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_k=50, | |
| top_p=0.95 | |
| ) | |
| generated_text = outputs[0]['generated_text'] | |
| print(f"Response: {generated_text}") | |
| print("-" * 30) | |
| if __name__ == "__main__": | |
| # Using Falcon3-1B-Base as it fits the 6-month, 0.6B-6B parameter criteria | |
| run_experiment("tiiuae/Falcon3-1B-Base") | |