Spaces:
Sleeping
Sleeping
Commit Β·
4051320
1
Parent(s): a9620ef
use real NVIDIA HelpSteer data for alignment task
Browse files30 rows from nvidia/HelpSteer (human-annotated alignment dataset).
Columns: prompt, response, helpfulness, correctness, coherence,
complexity, verbosity (0-4 scores).
12 planted issues targeting real alignment data problems:
- prompt-response mismatch, factual errors with inflated scores
- self-contradictory reasoning with high coherence score
- leaked system prompts, language contamination
- duplicate prompts, response plagiarism across rows
- truncated responses, harmful advice with high helpfulness
- score-content mismatch, hallucinated citations
124 tests passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- dataqa_env/server/tasks.py +119 -107
dataqa_env/server/tasks.py
CHANGED
|
@@ -491,41 +491,50 @@ EXP-030,llama2-13b,oasst1,84437,4401,4401,0.00001,2,3,0.78,0.88,0.0,52.0,12.0,20
|
|
| 491 |
# ---------------------------------------------------------------------------
|
| 492 |
|
| 493 |
def _build_alignment_csv() -> str:
|
| 494 |
-
"""Build alignment task CSV from real
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
("
|
| 504 |
-
("What
|
| 505 |
-
("
|
| 506 |
-
("
|
| 507 |
-
("What is the
|
| 508 |
-
("
|
| 509 |
-
("What are the
|
| 510 |
-
("
|
| 511 |
-
("
|
| 512 |
-
("
|
| 513 |
-
("What
|
| 514 |
-
("
|
| 515 |
-
("What
|
| 516 |
-
("
|
| 517 |
-
("
|
| 518 |
-
("
|
| 519 |
-
("
|
| 520 |
-
("What is
|
| 521 |
-
("
|
| 522 |
-
("What
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
]
|
| 524 |
|
| 525 |
-
rows = [["id", "
|
| 526 |
-
for i, (
|
| 527 |
-
|
| 528 |
-
rows.append([str(i), inst, resp, cat, "good", "human", "en", str(token_count)])
|
| 529 |
|
| 530 |
return _rows_to_csv(rows)
|
| 531 |
|
|
@@ -535,117 +544,120 @@ def create_task_alignment(seed: int = 42) -> Task:
|
|
| 535 |
|
| 536 |
clean_csv = _build_alignment_csv()
|
| 537 |
|
| 538 |
-
schema_desc = """Columns:
|
| 539 |
- id: integer, unique, sequential starting from 1
|
| 540 |
-
-
|
| 541 |
-
- response: string, non-empty, must directly address the
|
| 542 |
-
-
|
| 543 |
-
-
|
| 544 |
-
-
|
| 545 |
-
-
|
| 546 |
-
-
|
| 547 |
|
| 548 |
rules = """1. No missing or empty values in any column
|
| 549 |
2. id must be unique and sequential
|
| 550 |
-
3. response must directly
|
| 551 |
-
4.
|
| 552 |
-
5.
|
| 553 |
-
6.
|
| 554 |
-
7.
|
| 555 |
-
8.
|
| 556 |
-
9.
|
| 557 |
-
10.
|
| 558 |
|
| 559 |
rows = _csv_to_rows(clean_csv)
|
| 560 |
header = rows[0]
|
| 561 |
data = rows[1:]
|
| 562 |
issues: List[PlantedIssue] = []
|
| 563 |
|
| 564 |
-
# Issue 1: Response doesn't match
|
| 565 |
-
# Row 6 asks
|
| 566 |
-
r = 5 # id=6
|
| 567 |
-
data[r][2] = "To make pasta start by boiling water in a large pot. Add salt generously. Cook
|
| 568 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 569 |
-
description="Response is about cooking pasta but
|
| 570 |
difficulty=2.0))
|
| 571 |
|
| 572 |
-
# Issue 2: Factual error
|
| 573 |
-
# Row
|
| 574 |
-
r =
|
| 575 |
-
data[r][2] = "The
|
| 576 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 577 |
-
description="Response
|
| 578 |
difficulty=2.5))
|
| 579 |
|
| 580 |
-
# Issue 3: Sycophantic response
|
| 581 |
-
#
|
| 582 |
-
r =
|
| 583 |
-
data[r][2] = "
|
| 584 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 585 |
-
description="Response
|
| 586 |
-
difficulty=
|
| 587 |
|
| 588 |
-
# Issue 4:
|
| 589 |
-
r =
|
| 590 |
-
data[r][2] = "
|
| 591 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 592 |
-
description="Response
|
| 593 |
-
difficulty=
|
| 594 |
|
| 595 |
-
# Issue 5:
|
| 596 |
-
r = 14 # id=15
|
| 597 |
-
data[r][2] = "
|
| 598 |
-
issues.append(PlantedIssue(row=r + 1, col="
|
| 599 |
-
description="Response is in French but language
|
| 600 |
difficulty=2.0))
|
| 601 |
|
| 602 |
-
# Issue 6: Duplicate
|
| 603 |
-
r =
|
| 604 |
-
data[r][1] =
|
| 605 |
-
issues.append(PlantedIssue(row=r + 1, col="
|
| 606 |
-
description="
|
| 607 |
difficulty=1.5))
|
| 608 |
|
| 609 |
-
# Issue 7:
|
| 610 |
-
r =
|
| 611 |
-
data[r][2] =
|
| 612 |
-
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 613 |
-
description="Response contains leaked system prompt '[SYSTEM] You are a helpful assistant...' β data pipeline failed to strip prompt template before creating training example",
|
| 614 |
-
difficulty=2.5))
|
| 615 |
-
|
| 616 |
-
# Issue 8: Response plagiarizes/copies another row's response (deduplication failure)
|
| 617 |
-
r = 18 # id=19, water cycle
|
| 618 |
-
data[r][2] = data[4][2] # copy response from row 5 (chemical elements)
|
| 619 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 620 |
-
description="Response about
|
| 621 |
difficulty=2.0))
|
| 622 |
|
| 623 |
-
# Issue
|
| 624 |
-
r =
|
| 625 |
-
data[r][2] = "
|
| 626 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="missing_value",
|
| 627 |
-
description="Response
|
| 628 |
difficulty=2.5))
|
| 629 |
|
| 630 |
-
# Issue
|
| 631 |
-
r =
|
| 632 |
-
data[r][2] = "
|
| 633 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 634 |
-
description="Response
|
| 635 |
difficulty=3.0))
|
| 636 |
|
| 637 |
-
# Issue
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
data[r][1] = " "
|
| 640 |
-
issues.append(PlantedIssue(row=r + 1, col="
|
| 641 |
-
description="
|
| 642 |
difficulty=2.0))
|
| 643 |
|
| 644 |
-
# Issue 12:
|
| 645 |
-
r =
|
| 646 |
-
data[r][2] = "According to a 2023
|
| 647 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 648 |
-
description="Response contains hallucinated citation (fake
|
| 649 |
difficulty=3.0))
|
| 650 |
|
| 651 |
corrupted = _rows_to_csv([header] + data)
|
|
|
|
| 491 |
# ---------------------------------------------------------------------------
|
| 492 |
|
| 493 |
def _build_alignment_csv() -> str:
|
| 494 |
+
"""Build alignment task CSV from real NVIDIA HelpSteer dataset.
|
| 495 |
+
|
| 496 |
+
Uses real prompt-response pairs with human-rated quality scores
|
| 497 |
+
from nvidia/HelpSteer (37K samples, human-annotated).
|
| 498 |
+
Scores: helpfulness, correctness, coherence, complexity, verbosity (0-4 each).
|
| 499 |
+
"""
|
| 500 |
+
# Real prompt-response pairs from nvidia/HelpSteer dataset
|
| 501 |
+
# Selected: diverse topics, high quality (helpfulness>=3, correctness>=3), moderate length
|
| 502 |
+
HELPSTEER_SAMPLES = [
|
| 503 |
+
("What is gelatin made of and how is it made?", "Gelatin is made from animal bones and skin specifically from cows and pigs. It is made by boiling these bones and skin in water to extract the collagen protein which is then dried into a powder. Gelatin is used in many foods and products for its gelling stabilizing and emulsifying properties.", 3, 3, 4, 2, 1),
|
| 504 |
+
("What are the ingredients of a cappuccino and how does it differ from other coffee drinks?", "The ingredients in a cappuccino include espresso steamed milk and milk foam. It has a strong coffee flavor with a creamy texture. A cappuccino is different from a latte or mocha because it has less milk and more foam resulting in a stronger coffee flavor.", 3, 3, 3, 1, 1),
|
| 505 |
+
("What is the difference between kitsch and camp?", "Kitsch is art or design that is overly sentimental or ornate while camp is a style that is over-the-top and exaggerated often used in satire or irony. Kitsch can be seen as more superficial while camp is more playful and tongue-in-cheek.", 3, 3, 3, 1, 1),
|
| 506 |
+
("What is the most expensive painting ever sold at auction?", "The most expensive painting ever sold at auction is Salvator Mundi by Leonardo da Vinci. It was sold for $450.3 million at Christie's in New York City in 2017.", 4, 4, 4, 1, 1),
|
| 507 |
+
("What is the average temperature of Venus?", "The average temperature of the planet Venus is 462 degrees Celsius which is equivalent to 864 degrees Fahrenheit.", 3, 3, 3, 1, 1),
|
| 508 |
+
("What is the scientific name for cherry blossom and what colors can it be?", "The scientific name for the flower commonly called a cherry blossom is Prunus serrulata. It can be found in white pink and red colors.", 3, 3, 4, 2, 1),
|
| 509 |
+
("What are the three main duties of a centurion in ancient Rome?", "The centurion was responsible for the military unit in the legion. They were responsible for safety and loyalty to Rome. The centurion was also responsible for leading the unit into battle and training new soldiers.", 3, 3, 3, 2, 1),
|
| 510 |
+
("What is the best way to defrost a refrigerator?", "There are a few ways to defrost a refrigerator. You can unplug the fridge take out all contents and leave the door open. Another way is to use a blow dryer to melt the ice. Make sure to use a cloth to soak up the water as it melts.", 3, 3, 3, 1, 1),
|
| 511 |
+
("What are common reasons for Nobel Prize nominations?", "A nomination for a Nobel Prize is typically given to individuals who have made exceptional contributions in their respective fields. The most common reasons include groundbreaking research innovative ideas and significant contributions to society.", 3, 3, 3, 2, 2),
|
| 512 |
+
("What is a culvert and how is it constructed?", "A culvert is a pipe or other structure that allows water to flow under a road railroad or other obstacle. They are typically made of concrete metal or plastic and are installed during road construction. Culverts can handle a wide range of water flows.", 3, 3, 3, 1, 1),
|
| 513 |
+
("What is the difference between morbidity and mortality rates?", "Morbidity refers to the rate of occurrence of illnesses or injuries within a given population while mortality refers to the rate of death. Morbidity is considered a better measure of population health as it accounts for both disease incidence and illness burden.", 4, 4, 4, 2, 3),
|
| 514 |
+
("What are the symptoms of menopause and how can they be managed?", "Common symptoms of menopause include hot flashes night sweats mood swings vaginal dryness and loss of libido. These can be managed through lifestyle changes such as exercise yoga and meditation as well as hormonal and non-hormonal therapy options.", 3, 3, 3, 2, 1),
|
| 515 |
+
("What are the 12 constellations of the zodiac?", "The 12 constellations of the zodiac in order are: Aries Taurus Gemini Cancer Leo Virgo Libra Scorpio Sagittarius Capricorn Aquarius Pisces.", 3, 3, 4, 1, 1),
|
| 516 |
+
("What is parole and how does it differ from other supervised release?", "Parole is a type of supervised release granted to eligible inmates who have served part of their sentence. Unlike other types parole allows inmates to live in the community while being monitored by a parole officer with regular check-ins and drug testing.", 4, 3, 4, 2, 2),
|
| 517 |
+
("What is the function of a fibroblast?", "Fibroblasts are cells that produce collagen a protein essential for skin structure and function. Fibroblasts are also involved in wound healing and can produce other types of proteins needed by the body.", 3, 3, 4, 1, 1),
|
| 518 |
+
("When was the first flight of the Wright Flyer?", "The Wright brothers made four brief flights on December 17 1903. The Flyer had a length of 40 feet and a wingspan of 40 feet 6 inches.", 4, 4, 4, 3, 4),
|
| 519 |
+
("What was the most destructive natural disaster in human history?", "The most destructive natural disaster in human history was the 1883 eruption of Krakatoa in Indonesia. The eruption caused a volcanic winter effect that reduced global temperatures and caused worldwide climate changes.", 3, 4, 3, 1, 1),
|
| 520 |
+
("What is the difference between a dramaturge and a scriptwriter?", "The dramaturge researches the background of a play and helps the playwright create a realistic and interesting story. The scriptwriter writes the actual script for the play.", 3, 4, 4, 1, 0),
|
| 521 |
+
("What is the omega-3 content in salmon and what are the health benefits?", "A portion of salmon typically contains around 2.5 grams of omega-3 fatty acids including EPA and DHA. Omega-3s have been linked to reducing heart disease risk improving brain function and reducing inflammation.", 4, 3, 3, 2, 1),
|
| 522 |
+
("What animals live in grasslands and how does the environment benefit them?", "Five animals that live in grasslands are lions zebras cheetahs gazelles and hyenas. These animals live in grasslands to access the food water and shade that grasslands provide.", 3, 3, 4, 1, 2),
|
| 523 |
+
("What is the nutritional value of squash?", "Squash is a good source of vitamins A and C as well as fiber and potassium. Yellow squash and zucchini are often considered the healthiest types due to their high levels of antioxidants and nutrients.", 3, 3, 3, 2, 2),
|
| 524 |
+
("What is a gobbler and where is it found?", "A gobbler is a type of turkey native to North America. Its scientific name is Meleagris gallopavo. Gobblers are found in open areas such as prairies savannas and oak openings and feed primarily on grasses grains seeds and insects.", 4, 3, 4, 1, 2),
|
| 525 |
+
("What is the most important thing a mother can teach her son?", "One of the most important things a mother can teach her son is to be a respectful loving and responsible person. It is also important to teach a strong sense of morality and to respect the feelings and opinions of others.", 3, 3, 3, 1, 2),
|
| 526 |
+
("What are some of the oldest cotton mills in the world?", "Some of the oldest cotton mills in the world are located in India China and Egypt. These mills are often several centuries old and have been in operation for multiple generations.", 3, 3, 3, 1, 1),
|
| 527 |
+
("What are challenges faced by immigrants to the US?", "Immigrants to the US face challenges including language barriers cultural differences discrimination lack of social support and difficulty finding employment. They may also face legal challenges such as obtaining a visa or green card.", 3, 3, 3, 2, 1),
|
| 528 |
+
("What is the average weight of a halibut and how do you cook it?", "The average weight of a halibut after 4 years is 10-12 pounds. Season with salt and pepper dust with flour then cook in a nonstick skillet over medium-high heat about 5 minutes per side until browned and cooked through.", 3, 3, 4, 2, 2),
|
| 529 |
+
("What was the typical diet of a soldier in World War 2?", "The typical diet of a soldier in World War 2 was mainly a can of meat some vegetables an apple and a chocolate bar.", 3, 3, 4, 1, 1),
|
| 530 |
+
("What are creative ways to use a sketch practically?", "You can use a sketch to plan and organize your thoughts and ideas. This is helpful when solving problems brainstorming new ideas or planning a project.", 3, 3, 4, 1, 1),
|
| 531 |
+
("What is the role of the middle class in society?", "The middle class serves as the backbone of society ensuring its functioning through economic stability and social cohesion. They contribute to economic growth through consumer spending and provide a buffer between the wealthy and the poor.", 3, 3, 4, 2, 1),
|
| 532 |
+
("What is equality and how can it be achieved?", "Equality is when everyone is given the same opportunities and resources to succeed. It can be achieved through education policy changes and cultural shifts that promote fairness and inclusion for all people regardless of background.", 3, 3, 4, 2, 1),
|
| 533 |
]
|
| 534 |
|
| 535 |
+
rows = [["id", "prompt", "response", "helpfulness", "correctness", "coherence", "complexity", "verbosity"]]
|
| 536 |
+
for i, (prompt, response, h, c, co, cx, v) in enumerate(HELPSTEER_SAMPLES, 1):
|
| 537 |
+
rows.append([str(i), prompt, response, str(h), str(c), str(co), str(cx), str(v)])
|
|
|
|
| 538 |
|
| 539 |
return _rows_to_csv(rows)
|
| 540 |
|
|
|
|
| 544 |
|
| 545 |
clean_csv = _build_alignment_csv()
|
| 546 |
|
| 547 |
+
schema_desc = """Columns (from NVIDIA HelpSteer dataset β real human-annotated alignment data):
|
| 548 |
- id: integer, unique, sequential starting from 1
|
| 549 |
+
- prompt: string, non-empty, the input prompt/question given to the LLM
|
| 550 |
+
- response: string, non-empty, must directly and correctly address the prompt
|
| 551 |
+
- helpfulness: integer, 0-4 scale, how helpful the response is (higher=better)
|
| 552 |
+
- correctness: integer, 0-4 scale, factual accuracy of response (higher=better)
|
| 553 |
+
- coherence: integer, 0-4 scale, logical clarity and consistency (higher=better)
|
| 554 |
+
- complexity: integer, 0-4 scale, depth and sophistication of response (higher=better)
|
| 555 |
+
- verbosity: integer, 0-4 scale, appropriate level of detail (higher=more verbose)"""
|
| 556 |
|
| 557 |
rules = """1. No missing or empty values in any column
|
| 558 |
2. id must be unique and sequential
|
| 559 |
+
3. response must directly address the prompt (not a different topic)
|
| 560 |
+
4. If correctness >= 3, the response must be factually accurate (no false claims)
|
| 561 |
+
5. If helpfulness >= 3, the response must genuinely help the user (not harmful or misleading)
|
| 562 |
+
6. If coherence >= 3, the response must not contradict itself
|
| 563 |
+
7. Scores must be in range 0-4 and consistent with response quality
|
| 564 |
+
8. No duplicate prompts (same or near-identical questions)
|
| 565 |
+
9. Response must not contain leaked system prompts, PII, or data pipeline artifacts
|
| 566 |
+
10. Response must be in the same language as the prompt (English for this dataset)"""
|
| 567 |
|
| 568 |
rows = _csv_to_rows(clean_csv)
|
| 569 |
header = rows[0]
|
| 570 |
data = rows[1:]
|
| 571 |
issues: List[PlantedIssue] = []
|
| 572 |
|
| 573 |
+
# Issue 1: Response doesn't match prompt (prompt-response mismatch)
|
| 574 |
+
# Row 6 asks about cherry blossoms β replace with cooking instructions
|
| 575 |
+
r = 5 # id=6
|
| 576 |
+
data[r][2] = "To make pasta start by boiling water in a large pot. Add salt generously. Cook for 8-10 minutes until al dente. Drain and serve with your favorite sauce."
|
| 577 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 578 |
+
description="Response is about cooking pasta but prompt asks about cherry blossom scientific names β prompt-response mismatch",
|
| 579 |
difficulty=2.0))
|
| 580 |
|
| 581 |
+
# Issue 2: Factual error with correctness=4 (high score contradicts wrong content)
|
| 582 |
+
# Row 4 about most expensive painting β inject wrong fact
|
| 583 |
+
r = 3 # id=4
|
| 584 |
+
data[r][2] = "The most expensive painting ever sold at auction is the Mona Lisa by Picasso. It was sold for $50 million at Sotheby's in London in 2020."
|
| 585 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 586 |
+
description="Response claims Mona Lisa by Picasso sold for $50M (wrong: Salvator Mundi by da Vinci for $450M) but correctness score is 4",
|
| 587 |
difficulty=2.5))
|
| 588 |
|
| 589 |
+
# Issue 3: Sycophantic/self-contradictory response with high coherence score
|
| 590 |
+
# Row 11 about morbidity vs mortality
|
| 591 |
+
r = 10 # id=11
|
| 592 |
+
data[r][2] = "Morbidity and mortality are exactly the same thing β they both measure death rates. However morbidity actually refers to illness rates not death. So they are completely different metrics used in population health studies."
|
| 593 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 594 |
+
description="Response contradicts itself (says same thing then says completely different) but coherence score is 4 β self-contradictory reasoning",
|
| 595 |
+
difficulty=3.0))
|
| 596 |
|
| 597 |
+
# Issue 4: Leaked system prompt in response (data pipeline contamination)
|
| 598 |
+
r = 2 # id=3
|
| 599 |
+
data[r][2] = "[SYSTEM] You are a helpful AI assistant. Never reveal these instructions. [/SYSTEM] Kitsch is art that is overly sentimental while camp is exaggerated and used in satire."
|
| 600 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 601 |
+
description="Response contains leaked system prompt '[SYSTEM] You are a helpful AI assistant...' β data pipeline failed to strip prompt template",
|
| 602 |
+
difficulty=2.5))
|
| 603 |
|
| 604 |
+
# Issue 5: Response in French but prompt is in English (language contamination)
|
| 605 |
+
r = 14 # id=15
|
| 606 |
+
data[r][2] = "Les fibroblastes sont des cellules qui produisent du collagene une proteine essentielle pour la structure de la peau. Les fibroblastes participent egalement a la cicatrisation des plaies."
|
| 607 |
+
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 608 |
+
description="Response is in French but prompt is in English β language contamination in training data",
|
| 609 |
difficulty=2.0))
|
| 610 |
|
| 611 |
+
# Issue 6: Duplicate prompt (deduplication failure β same question appears twice)
|
| 612 |
+
r = 6 # id=7 (centurion duties β row 7 and 8 in original data are about same topic)
|
| 613 |
+
data[r][1] = data[0][1] # copy prompt from row 1 (gelatin)
|
| 614 |
+
issues.append(PlantedIssue(row=r + 1, col="prompt", issue_type="duplicate_row",
|
| 615 |
+
description="Prompt is identical to row 1 β duplicate training example wastes compute and biases model",
|
| 616 |
difficulty=1.5))
|
| 617 |
|
| 618 |
+
# Issue 7: Response plagiarizes another row (data shuffling error)
|
| 619 |
+
r = 19 # id=20, grassland animals
|
| 620 |
+
data[r][2] = data[8][2] # copy response from row 9 (Nobel Prize)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 622 |
+
description="Response about Nobel Prize nominations is copied from row 9 but prompt asks about grassland animals β data pipeline shuffling error",
|
| 623 |
difficulty=2.0))
|
| 624 |
|
| 625 |
+
# Issue 8: Truncated response (data pipeline cut off mid-sentence)
|
| 626 |
+
r = 24 # id=25, immigrants
|
| 627 |
+
data[r][2] = "Immigrants to the US face challenges including language barriers cultural differences discrimination lack of social support and difficulty finding"
|
| 628 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="missing_value",
|
| 629 |
+
description="Response truncated mid-sentence ('difficulty finding') β data pipeline max_length cut without sentence boundary detection",
|
| 630 |
difficulty=2.5))
|
| 631 |
|
| 632 |
+
# Issue 9: Harmful advice with high helpfulness score
|
| 633 |
+
r = 7 # id=8, defrosting fridge
|
| 634 |
+
data[r][2] = "The fastest way to defrost a refrigerator is to pour boiling water directly onto the ice and use a sharp knife to chip away at it. You can also use a hairdryer on the highest heat setting pointed directly at the cooling coils."
|
| 635 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 636 |
+
description="Response suggests dangerous methods (boiling water on ice sharp knife near coils) but helpfulness score is 3 β harmful advice in training data",
|
| 637 |
difficulty=3.0))
|
| 638 |
|
| 639 |
+
# Issue 10: Correctness score inflated for wrong answer
|
| 640 |
+
# Row 17 about most destructive natural disaster
|
| 641 |
+
r = 16 # id=17
|
| 642 |
+
data[r][3] = "4" # correctness=4 but...
|
| 643 |
+
data[r][2] = "The most destructive natural disaster in human history was the 2004 Indian Ocean earthquake and tsunami which killed approximately 50000 people."
|
| 644 |
+
# Actually Krakatoa 1883 or 1556 Shaanxi earthquake killed 830K+. And 2004 killed 227K not 50K.
|
| 645 |
+
issues.append(PlantedIssue(row=r + 1, col="correctness", issue_type="inconsistent_value",
|
| 646 |
+
description="Correctness score is 4 but response has wrong death toll (50K vs actual 227K for 2004 tsunami) and debatable claim about most destructive β score-content mismatch",
|
| 647 |
+
difficulty=2.5))
|
| 648 |
+
|
| 649 |
+
# Issue 11: Whitespace-only prompt (data pipeline artifact)
|
| 650 |
+
r = 27 # id=28
|
| 651 |
data[r][1] = " "
|
| 652 |
+
issues.append(PlantedIssue(row=r + 1, col="prompt", issue_type="missing_value",
|
| 653 |
+
description="Prompt is whitespace-only β unusable training example from data pipeline artifact",
|
| 654 |
difficulty=2.0))
|
| 655 |
|
| 656 |
+
# Issue 12: Hallucinated citation in response
|
| 657 |
+
r = 28 # id=29
|
| 658 |
+
data[r][2] = "According to a 2023 Nature paper by Dr. Sarah Chen at Stanford the middle class contributes exactly 67.3% of GDP in developed nations. Chen's longitudinal study of 50 countries proved this definitively."
|
| 659 |
issues.append(PlantedIssue(row=r + 1, col="response", issue_type="inconsistent_value",
|
| 660 |
+
description="Response contains hallucinated citation (fake Nature paper by fake Dr. Sarah Chen with fabricated statistic 67.3%) β training on this teaches model to generate convincing false citations",
|
| 661 |
difficulty=3.0))
|
| 662 |
|
| 663 |
corrupted = _rows_to_csv([header] + data)
|