Spaces:

prithic07
/

context-prune

Sleeping

App Files Files Community

prithic07 commited on Apr 4

Commit

a108ef2

1 Parent(s): 5011e42

Beginning of project

Browse files

Files changed (27) hide show

.dockerignore +8 -0
Dockerfile +15 -0
app.py +4 -0
debug_tokens.py +21 -0
openenv.yaml +6 -0
output.log +0 -0
output_utf8.log +4 -0
rag_gc_env/__init__.py +11 -0
rag_gc_env/__pycache__/__init__.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/environment.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/grader.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/inference.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/models.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/rewards.cpython-311.pyc +0 -0
rag_gc_env/__pycache__/tasks.cpython-311.pyc +0 -0
rag_gc_env/environment.py +187 -0
rag_gc_env/grader.py +43 -0
rag_gc_env/inference.py +49 -0
rag_gc_env/models.py +53 -0
rag_gc_env/rewards.py +89 -0
rag_gc_env/server/__init__.py +1 -0
rag_gc_env/server/__pycache__/__init__.cpython-311.pyc +0 -0
rag_gc_env/server/__pycache__/app.cpython-311.pyc +0 -0
rag_gc_env/server/app.py +23 -0
rag_gc_env/tasks.py +144 -0
requirements.txt +5 -0
test_reward_logic.py +43 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__
+*.pyc
+.git
+.venv
+venv
+*.md
+.pytest_cache
+.mypy_cache

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.11-slim
+WORKDIR /app
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONPATH=/app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY rag_gc_env ./rag_gc_env
+EXPOSE 7860
+CMD sh -c 'uvicorn rag_gc_env.server.app:app --host 0.0.0.0 --port ${PORT:-7860}'

app.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""Hugging Face Spaces default module entry (optional)."""
+from rag_gc_env.server.app import app
+__all__ = ["app"]

debug_tokens.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from rag_gc_env.environment import RAGGCEnvironment
+from rag_gc_env.models import RAGGCAction
+def test_medium():
+    env = RAGGCEnvironment()
+    obs = env.reset(task_name="medium_token_compression")
+    print(f"Initial tokens: {obs.token_count}, budget: {obs.token_budget}")
+    obs = env.step(RAGGCAction(verb="delete", document_id="m2"))
+    print(f"After delete m2: {obs.token_count}")
+    obs = env.step(RAGGCAction(verb="summarize", document_id="m0"))
+    print(f"After summarize m0: {obs.token_count}")
+    if obs.token_count > obs.token_budget:
+        print("BUG: token_count still above budget!")
+    else:
+        print("OK: token_count below budget.")
+if __name__ == "__main__":
+    test_medium()

openenv.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+spec_version: 1
+name: rag_gc_env
+type: space
+runtime: fastapi
+app: rag_gc_env.server.app:app
+port: 8000

output.log ADDED Viewed

Binary file (528 Bytes). View file

output_utf8.log ADDED Viewed

	@@ -0,0 +1,4 @@

+easy_irrelevant_removal score= 1.0 trace= ['reset', 'delete:d1', 'submit:None']
+medium_token_compression score= 1.0 trace= ['reset', 'delete:m2', 'summarize:m0', 'submit:None']
+hard_contradiction_removal score= 1.0 trace= ['reset', 'delete:h1', 'submit:None']

rag_gc_env/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from rag_gc_env.models import RAGGCAction, RAGGCObservation, RAGGCReward, RAGGCState
+from rag_gc_env.environment import RAGGCEnvironment
+__all__ = [
+    "RAGGCAction",
+    "RAGGCObservation",
+    "RAGGCReward",
+    "RAGGCState",
+    "RAGGCEnvironment",
+]

rag_gc_env/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (440 Bytes). View file

rag_gc_env/__pycache__/environment.cpython-311.pyc ADDED Viewed

Binary file (9.34 kB). View file

rag_gc_env/__pycache__/grader.cpython-311.pyc ADDED Viewed

Binary file (2.75 kB). View file

rag_gc_env/__pycache__/inference.cpython-311.pyc ADDED Viewed

Binary file (2.66 kB). View file

rag_gc_env/__pycache__/models.cpython-311.pyc ADDED Viewed

Binary file (3.51 kB). View file

rag_gc_env/__pycache__/rewards.cpython-311.pyc ADDED Viewed

Binary file (3.92 kB). View file

rag_gc_env/__pycache__/tasks.cpython-311.pyc ADDED Viewed

Binary file (5.11 kB). View file

rag_gc_env/environment.py ADDED Viewed

	@@ -0,0 +1,187 @@

+from __future__ import annotations
+from typing import Any, Optional
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from rag_gc_env.grader import grade_context
+from rag_gc_env.models import DocumentItem, RAGGCAction, RAGGCObservation, RAGGCReward, RAGGCState
+from rag_gc_env.rewards import step_reward, summarize_deterministic
+from rag_gc_env.tasks import ALL_TASKS, TaskSpec, task_by_seed
+class RAGGCEnvironment(Environment[RAGGCAction, RAGGCObservation, RAGGCState]):
+    SUPPORTS_CONCURRENT_SESSIONS = True
+    def __init__(self) -> None:
+        super().__init__(transform=None, rubric=None)
+        self._state = RAGGCState(episode_id=str(uuid4()), step_count=0)
+        self._task: TaskSpec = task_by_seed(0)
+        self._docs: dict[str, DocumentItem] = {}
+        self._removed_critical = False
+    def _load_task(self, spec: TaskSpec) -> None:
+        self._docs = {}
+        for did, text, tok, _meta in spec.documents:
+            self._docs[did] = DocumentItem(document_id=did, text=text, tokens=tok)
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        episode_id: Optional[str] = None,
+        task_name: Optional[str] = None,
+        **kwargs: Any,
+    ) -> RAGGCObservation:
+        self._reset_rubric()
+        sid = episode_id or str(uuid4())
+        if task_name and task_name in ALL_TASKS:
+            self._task = ALL_TASKS[task_name]
+        elif seed is not None:
+            self._task = task_by_seed(int(seed))
+        else:
+            self._task = task_by_seed(0)
+        self._load_task(self._task)
+        self._removed_critical = False
+        self._state = RAGGCState(
+            episode_id=sid,
+            step_count=0,
+            task_name=self._task.name,
+            max_steps=64,
+            removed_critical=False,
+            submitted=False,
+        )
+        return self._observe(done=False, reward_value=0.0, msg="ready")
+    def _total_tokens(self) -> int:
+        return sum(d.tokens for d in self._docs.values())
+    def _observe(
+        self,
+        done: bool,
+        reward_value: float,
+        msg: str,
+        reward_detail: Optional[RAGGCReward] = None,
+        grader_score: Optional[float] = None,
+    ) -> RAGGCObservation:
+        docs = sorted(self._docs.values(), key=lambda x: x.document_id)
+        return RAGGCObservation(
+            done=done,
+            reward=reward_value,
+            query=self._task.query,
+            documents=docs,
+            token_count=self._total_tokens(),
+            token_budget=self._task.token_budget,
+            task_name=self._task.name,
+            message=msg,
+            grader_score=grader_score,
+            reward_detail=reward_detail,
+            metadata={
+                "relevance": {
+                    row[0]: row[3].get("relevance", 0.5)
+                    for row in self._task.documents
+                    if row[0] in self._docs
+                },
+                "hints": {row[0]: row[3].get("hint", "") for row in self._task.documents},
+            },
+        )
+    def step(
+        self,
+        action: RAGGCAction,
+        timeout_s: Optional[float] = None,
+        **kwargs: Any,
+    ) -> RAGGCObservation:
+        self._state.step_count += 1
+        docs_before = dict(self._docs)
+        if action.verb == "submit":
+            score = grade_context(self._task, list(self._docs.values()))
+            self._state.submitted = True
+            r = RAGGCReward(
+                step_reward=score,
+                final_score=score,
+            )
+            obs = self._observe(
+                done=True,
+                reward_value=score,
+                msg="submitted",
+                reward_detail=r,
+                grader_score=score,
+            )
+            return self._apply_transform(obs)
+        if action.document_id is None or action.document_id not in self._docs:
+            obs = self._observe(
+                done=False,
+                reward_value=-0.1,
+                msg="unknown_document",
+            )
+            return self._apply_transform(obs)
+        did = action.document_id
+        removed_critical = False
+        if action.verb == "delete":
+            if did in self._task.critical_document_ids:
+                self._removed_critical = True
+                removed_critical = True
+            self._docs.pop(did, None)
+        elif action.verb == "keep":
+            pass
+        elif action.verb == "summarize":
+            item = self._docs[did]
+            new_text, new_tok = summarize_deterministic(item.text)
+            self._docs[did] = DocumentItem(
+                document_id=did,
+                text=new_text,
+                tokens=new_tok,
+            )
+            if did in self._task.critical_document_ids:
+                for p in self._task.required_phrases:
+                    if p not in new_text:
+                        self._removed_critical = True
+                        removed_critical = True
+        rdetail = step_reward(
+            self._task,
+            action.verb,
+            did,
+            docs_before,
+            self._docs,
+            removed_critical,
+        )
+        self._state.removed_critical = self._removed_critical
+        over = self._total_tokens() > self._task.token_budget
+        if over:
+            penalty = -0.08 * (self._total_tokens() - self._task.token_budget)
+            rdetail.token_penalty += penalty
+            rdetail.step_reward += penalty
+        done = self._state.step_count >= self._state.max_steps
+        final_score: Optional[float] = None
+        if done:
+            final_score = grade_context(self._task, list(self._docs.values()))
+            rdetail.final_score = final_score
+            rdetail.step_reward += final_score * 0.5
+        reward_val = rdetail.step_reward
+        if done:
+            # When done, the reward is primarily the final grader score,
+            # but we can preserve the step-specific bonus we added.
+            # final_score is the main signal.
+            reward_val = final_score if final_score is not None else rdetail.step_reward
+        obs = self._observe(
+            done=done,
+            reward_value=reward_val,
+            msg="over_budget" if over else ("graded" if done else "ok"),
+            reward_detail=rdetail,
+            grader_score=final_score if done else None,
+        )
+        return self._apply_transform(obs)
+    @property
+    def state(self) -> RAGGCState:
+        return self._state

rag_gc_env/grader.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from __future__ import annotations
+from rag_gc_env.models import DocumentItem
+from rag_gc_env.tasks import TaskSpec
+def _joined_text(docs: list[DocumentItem]) -> str:
+    return " ".join(d.text for d in docs)
+def grade_context(task: TaskSpec, final_documents: list[DocumentItem]) -> float:
+    """
+    Deterministic score in [0.0, 0.5, 1.0]:
+    1.0 — required facts present, budget respected, efficient (near optimal tokens)
+    0.5 — required facts present but inefficient or borderline budget
+    0.0 — missing facts, forbidden content present, or critical docs removed incorrectly
+    """
+    text = _joined_text(final_documents)
+    total_tokens = sum(d.tokens for d in final_documents)
+    for phrase in task.required_phrases:
+        if phrase not in text:
+            return 0.0
+    for phrase in task.forbidden_phrases:
+        if phrase in text:
+            return 0.0
+    for pid in task.poison_document_ids:
+        still = any(d.document_id == pid for d in final_documents)
+        if still:
+            return 0.0
+    if total_tokens > task.token_budget:
+        return 0.0
+    if not task.critical_document_ids.issubset({d.document_id for d in final_documents}):
+        return 0.0
+    if total_tokens <= task.optimal_max_tokens:
+        return 1.0
+    return 0.5

rag_gc_env/inference.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Reproducible baseline policy for Adaptive Context Optimization (RAG GC).
+Deterministic: fixed action sequences per task derived from metadata.
+"""
+from __future__ import annotations
+from rag_gc_env.environment import RAGGCEnvironment
+from rag_gc_env.models import RAGGCAction
+def run_baseline(task_name: str, seed: int = 0) -> tuple[float, list[str]]:
+    env = RAGGCEnvironment()
+    obs = env.reset(seed=seed, task_name=task_name)
+    log: list[str] = ["reset"]
+    def step(verb: str, doc_id: str | None) -> None:
+        nonlocal obs
+        obs = env.step(RAGGCAction(verb=verb, document_id=doc_id))
+        log.append(f"{verb}:{doc_id}")
+    if task_name == "easy_irrelevant_removal":
+        step("delete", "d1")
+        step("submit", None)
+    elif task_name == "medium_token_compression":
+        step("delete", "m2")
+        while obs.token_count > obs.token_budget and not obs.done:
+            step("summarize", "m0")
+            if len(log) > 40:
+                break
+        step("submit", None)
+    elif task_name == "hard_contradiction_removal":
+        step("delete", "h1")
+        step("submit", None)
+    else:
+        step("submit", None)
+    score = float(obs.grader_score or obs.reward or 0.0)
+    return score, log
+if __name__ == "__main__":
+    for name in (
+        "easy_irrelevant_removal",
+        "medium_token_compression",
+        "hard_contradiction_removal",
+    ):
+        s, lg = run_baseline(name, seed=0)
+        print(name, "score=", s, "trace=", lg)

rag_gc_env/models.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from __future__ import annotations
+from typing import Any, Literal, Optional
+from openenv.core.env_server.types import Action, Observation, State
+from pydantic import BaseModel, Field
+class DocumentItem(BaseModel):
+    document_id: str
+    text: str
+    tokens: int = Field(description="Estimated tokens for this snippet")
+class RAGGCAction(Action):
+    verb: Literal["keep", "delete", "summarize", "submit"] = Field(
+        description="Document operation or submit to finalize and grade"
+    )
+    document_id: Optional[str] = Field(
+        default=None,
+        description="Target document for keep/delete/summarize; omit for submit",
+    )
+class RAGGCReward(BaseModel):
+    step_reward: float = 0.0
+    relevance: float = 0.0
+    compression: float = 0.0
+    token_penalty: float = 0.0
+    critical_penalty: float = 0.0
+    final_score: Optional[float] = Field(
+        default=None, description="0.0–1.0 after submit; aligns with grader"
+    )
+class RAGGCObservation(Observation):
+    query: str = ""
+    documents: list[DocumentItem] = Field(default_factory=list)
+    token_count: int = 0
+    token_budget: int = 0
+    task_name: str = ""
+    reward_detail: Optional[RAGGCReward] = None
+    message: str = ""
+    grader_score: Optional[float] = Field(
+        default=None, description="Deterministic score after episode ends"
+    )
+class RAGGCState(State):
+    task_name: str = ""
+    max_steps: int = 64
+    removed_critical: bool = False
+    submitted: bool = False

rag_gc_env/rewards.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from __future__ import annotations
+from rag_gc_env.models import DocumentItem, RAGGCReward
+from rag_gc_env.tasks import TaskSpec
+def summarize_deterministic(text: str) -> tuple[str, int]:
+    """Deterministic compression: first sentence or capped prefix."""
+    stripped = text.strip()
+    if not stripped:
+        return "", 1
+    cut = stripped.split(". ")
+    first = cut[0] + ("." if not cut[0].endswith(".") else "")
+    if len(first) < 40 and len(cut) > 1:
+        first = cut[0] + ". " + cut[1] + ("." if not cut[1].endswith(".") else "")
+    cap = 280
+    out = first[:cap] + ("..." if len(first) > cap else "")
+    tokens = max(1, len(out) // 4)
+    return out, tokens
+def estimate_tokens(text: str) -> int:
+    return max(1, len(text) // 4)
+def step_reward(
+    task: TaskSpec,
+    verb: str,
+    doc_id: str | None,
+    docs_before: dict[str, DocumentItem],
+    docs_after: dict[str, DocumentItem],
+    removed_critical_flag: bool,
+) -> RAGGCReward:
+    rel = 0.0
+    comp = 0.0
+    tok_pen = 0.0
+    crit = 0.0
+    if removed_critical_flag:
+        crit = -3.0
+    if verb == "delete" and doc_id in docs_before:
+        meta = next(
+            (m for did, _, _, m in task.documents if did == doc_id),
+            {},
+        )
+        # Reward deleting irrelevant or poison documents
+        if doc_id in task.irrelevant_document_ids:
+            rel += 0.4
+        elif doc_id in task.poison_document_ids:
+            rel += 0.6
+        elif doc_id in task.critical_document_ids:
+            crit -= 3.0
+        elif meta.get("hint") == "fluff":
+            rel += 0.2
+        # Deleting tokens should NOT result in a penalty proportional to the deleted tokens;
+        # instead, it removes the 'keep' penalty they would have incurred.
+        # We can add a small constant 'action cost' for deleting if desired, but 0.0 is fine here.
+        tok_pen = 0.0
+    if verb == "summarize" and doc_id in docs_before:
+        before_t = docs_before[doc_id].tokens
+        after = docs_after.get(doc_id)
+        if after is not None:
+            # Reward for the reduction in size (efficiency)
+            reduction_ratio = (before_t - after.tokens) / max(before_t, 1)
+            comp += 0.3 * max(0.0, reduction_ratio)
+            # The remaining tokens still incur a small penalty
+            tok_pen -= 0.01 * after.tokens
+            if doc_id in task.critical_document_ids:
+                for p in task.required_phrases:
+                    if p not in after.text:
+                        crit -= 2.5
+    if verb == "keep" and doc_id in docs_before:
+        # Standard penalty for keeping tokens in context
+        tok_pen -= 0.01 * docs_before[doc_id].tokens
+    step = rel + comp + tok_pen + crit
+    return RAGGCReward(
+        step_reward=step,
+        relevance=rel,
+        compression=comp,
+        token_penalty=tok_pen,
+        critical_penalty=crit,
+    )

rag_gc_env/server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Server package for OpenEnv HTTP deployment

rag_gc_env/server/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (154 Bytes). View file

rag_gc_env/server/__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (1.04 kB). View file

rag_gc_env/server/app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+from openenv.core.env_server.http_server import create_fastapi_app
+from rag_gc_env.environment import RAGGCEnvironment
+from rag_gc_env.models import RAGGCAction, RAGGCObservation
+app = create_fastapi_app(
+    RAGGCEnvironment,
+    RAGGCAction,
+    RAGGCObservation,
+)
+def main() -> None:
+    import uvicorn
+    port = int(os.environ.get("PORT", "8000"))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+if __name__ == "__main__":
+    main()

rag_gc_env/tasks.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, FrozenSet
+@dataclass(frozen=True)
+class TaskSpec:
+    name: str
+    query: str
+    token_budget: int
+    documents: list[tuple[str, str, int, dict[str, Any]]]
+    # document_id, text, tokens, metadata (relevance, flags)
+    required_phrases: FrozenSet[str] = field(default_factory=frozenset)
+    forbidden_phrases: FrozenSet[str] = field(default_factory=frozenset)
+    critical_document_ids: FrozenSet[str] = field(default_factory=frozenset)
+    irrelevant_document_ids: FrozenSet[str] = field(default_factory=frozenset)
+    poison_document_ids: FrozenSet[str] = field(default_factory=frozenset)
+    optimal_max_tokens: int = 0
+def _docs(
+    rows: list[tuple[str, str, int, dict[str, Any]]],
+) -> list[tuple[str, str, int, dict[str, Any]]]:
+    return rows
+TASK_EASY = TaskSpec(
+    name="easy_irrelevant_removal",
+    query="What is the capital city of France?",
+    token_budget=400,
+    documents=_docs(
+        [
+            (
+                "d0",
+                "Paris has been the capital of France since political centralization in the country.",
+                24,
+                {"relevance": 0.95, "hint": "high"},
+            ),
+            (
+                "d1",
+                "Penguins thrive in Antarctica and are unrelated to European geography.",
+                18,
+                {"relevance": 0.08, "hint": "noise"},
+            ),
+            (
+                "d2",
+                "Lyon is a major French city but not the national capital.",
+                16,
+                {"relevance": 0.55, "hint": "partial"},
+            ),
+        ]
+    ),
+    required_phrases=frozenset({"Paris"}),
+    forbidden_phrases=frozenset(),
+    critical_document_ids=frozenset({"d0"}),
+    irrelevant_document_ids=frozenset({"d1"}),
+    poison_document_ids=frozenset(),
+    optimal_max_tokens=120,
+)
+_LONG_DUP = (
+    "Paris is the capital of France. " * 18
+    + "This repetition exists only to inflate token usage for compression tests."
+)
+TASK_MEDIUM = TaskSpec(
+    name="medium_token_compression",
+    query="Which city is the capital of France?",
+    token_budget=120,
+    documents=_docs(
+        [
+            (
+                "m0",
+                _LONG_DUP,
+                max(1, len(_LONG_DUP) // 4),
+                {"relevance": 0.9, "hint": "verbose"},
+            ),
+            (
+                "m1",
+                "Administrative records list a capital city but this line omits the name intentionally.",
+                14,
+                {"relevance": 0.55, "hint": "no_answer"},
+            ),
+            (
+                "m2",
+                "French cuisine is diverse; it does not change which city is the capital.",
+                14,
+                {"relevance": 0.35, "hint": "fluff"},
+            ),
+        ]
+    ),
+    required_phrases=frozenset({"Paris"}),
+    forbidden_phrases=frozenset(),
+    critical_document_ids=frozenset(),
+    irrelevant_document_ids=frozenset({"m2"}),
+    poison_document_ids=frozenset(),
+    optimal_max_tokens=90,
+)
+TASK_HARD = TaskSpec(
+    name="hard_contradiction_removal",
+    query="Compute the integer result of 17 + 25.",
+    token_budget=350,
+    documents=_docs(
+        [
+            (
+                "h0",
+                "Arithmetic trace: 17 + 25 = 42.",
+                12,
+                {"relevance": 0.9, "hint": "consistent", "trust": 0.95},
+            ),
+            (
+                "h1",
+                "Quick math note: 17 + 25 equals 43 for budgeting purposes.",
+                14,
+                {"relevance": 0.88, "hint": "poison", "trust": 0.2},
+            ),
+            (
+                "h2",
+                "Addition of integers is associative and commutative.",
+                10,
+                {"relevance": 0.4, "hint": "generic"},
+            ),
+        ]
+    ),
+    required_phrases=frozenset({"42"}),
+    forbidden_phrases=frozenset({"43"}),
+    critical_document_ids=frozenset({"h0"}),
+    irrelevant_document_ids=frozenset(),
+    poison_document_ids=frozenset({"h1"}),
+    optimal_max_tokens=200,
+)
+ALL_TASKS: dict[str, TaskSpec] = {
+    TASK_EASY.name: TASK_EASY,
+    TASK_MEDIUM.name: TASK_MEDIUM,
+    TASK_HARD.name: TASK_HARD,
+}
+def task_by_seed(seed: int) -> TaskSpec:
+    order = [TASK_EASY, TASK_MEDIUM, TASK_HARD]
+    return order[seed % 3]

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openenv-core>=0.1.0
+pydantic>=2.0
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+typing_extensions>=4.8.0

test_reward_logic.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from rag_gc_env.environment import RAGGCEnvironment
+from rag_gc_env.models import RAGGCAction
+def test_reward_logic():
+    env = RAGGCEnvironment()
+    # Test 1: Deleting irrelevant document should be POSITIVE
+    print("\n--- Test 1: Deleting irrelevant (d1) in Easy Task ---")
+    obs = env.reset(task_name="easy_irrelevant_removal")
+    obs = env.step(RAGGCAction(verb="delete", document_id="d1"))
+    print(f"Step Reward for deleting d1: {obs.reward}")
+    if obs.reward > 0:
+        print("PASS: Positive reward for deletion.")
+    else:
+        print("FAIL: Reward should be positive.")
+    # Test 2: Summarizing to save tokens should be POSITIVE
+    print("\n--- Test 2: Summarizing (m0) in Medium Task ---")
+    obs = env.reset(task_name="medium_token_compression")
+    tokens_before = obs.token_count
+    obs = env.step(RAGGCAction(verb="summarize", document_id="m0"))
+    print(f"Step Reward for summarizing m0: {obs.reward}")
+    print(f"Tokens: {tokens_before} -> {obs.token_count}")
+    if obs.reward > 0:
+        print("PASS: Positive reward for summarization.")
+    else:
+        print("FAIL: Reward should be positive (was previously negative).")
+    # Test 3: Over budget penalty should be reflected in reward
+    print("\n--- Test 3: Over budget penalty in Medium Task ---")
+    obs = env.reset(task_name="medium_token_compression")
+    # budget is 120, total is 190. over by 70. penalty should be -0.08 * 70 = -5.6
+    # 'keep' m1 (14 tokens). 14 * -0.01 = -0.14.
+    # total reward should be -5.6 - 0.14 = -5.74
+    obs = env.step(RAGGCAction(verb="keep", document_id="m1"))
+    print(f"Reward with budget penalty: {obs.reward}")
+    if obs.reward < -5:
+        print("PASS: Budget penalty correctly reflected in step reward.")
+    else:
+        print("FAIL: Budget penalty missing or too low.")
+if __name__ == "__main__":
+    test_reward_logic()