Spaces:

GalacticTriumvirate
/

Earning_lens

Sleeping

App Files Files Community

intelkishan commited on 7 days ago

Commit

abbea70

1 Parent(s): 2a048ac

Implemented quarter prediction

Browse files

Files changed (9) hide show

client.py +4 -0
evaluate.py +2 -1
inference.py +40 -18
server/app.py +1 -1
server/earnings_analyst_environment.py +3 -1
tasks/__init__.py +4 -14
tasks/grading.py +29 -0
tasks/next_quarter_move/grader.py +24 -5
tasks/next_quarter_move/spec.py +24 -7

client.py CHANGED Viewed

@@ -53,6 +53,8 @@ class EarningsAnalystEnv(
         }
     def _parse_result(self, payload: Dict) -> StepResult[EarningsAnalystObservation]:
         """
         Parse server response into StepResult[EarningsAnalystObservation].
@@ -69,7 +71,9 @@ class EarningsAnalystEnv(
             task_instruction=obs_data.get("task_instruction", ""),
             done=payload.get("done", False),
             reward=payload.get("reward"),
             metadata=obs_data.get("metadata", {}),
         )
         return StepResult(

         }
     def _parse_result(self, payload: Dict) -> StepResult[EarningsAnalystObservation]:
         """
         Parse server response into StepResult[EarningsAnalystObservation].
             task_instruction=obs_data.get("task_instruction", ""),
             done=payload.get("done", False),
             reward=payload.get("reward"),
+            ground_truth=obs_data.get("ground_truth", ""),
             metadata=obs_data.get("metadata", {}),
         )
         return StepResult(

evaluate.py CHANGED Viewed

@@ -82,7 +82,8 @@ async def run_evaluation(
         episode_result = await run_episode(
             base_url=base_url,
             model=model,
-            verbose=False,
         )
         episode_reward = float(
             episode_result.reward if episode_result.reward is not None else 0.0

         episode_result = await run_episode(
             base_url=base_url,
             model=model,
+            verbose=True,
         )
         episode_reward = float(
             episode_result.reward if episode_result.reward is not None else 0.0

inference.py CHANGED Viewed

@@ -54,9 +54,12 @@ class EpisodeResult:
     model_response_text: str | None = None
-def _normalize_sentiment(model_text: str, valid: list[str] | None = None) -> str:
-    """Map model output to a canonical label; fallback to neutral."""
-    labels = valid or DEFAULT_LABELS
     normalized_model_text = str(model_text).strip().lower()
     for canonical_label in labels:
         if normalized_model_text == canonical_label.lower():
@@ -91,18 +94,14 @@ async def predict_with_openai(
     valid_labels: list[str] | None = None,
 ) -> tuple[str, str]:
     """
-    Example Chat Completions call returning a JSON object; maps to a canonical label.
-    Replace or parameterize this when you implement tasks beyond placeholder demos.
     """
-    labels = valid_labels or DEFAULT_LABELS
     user_content = build_user_content(obs)
     system_prompt = (
         "You are a financial analyst assistant. "
-        "Reply with a single JSON object only, no markdown or extra text, "
-        'with key "sentiment" whose value is exactly one of: '
-        + ", ".join(f'"{lab}"' for lab in labels)
-        + "."
     )
     completion = await client.chat.completions.create(
         model=model,
@@ -113,13 +112,24 @@ async def predict_with_openai(
         response_format={"type": "json_object"},
     )
     response_text = (completion.choices[0].message.content or "").strip()
-    predicted = "neutral"
     try:
         parsed: dict[str, Any] = json.loads(response_text)
-        if isinstance(parsed, dict) and "sentiment" in parsed:
-            predicted = _normalize_sentiment(str(parsed["sentiment"]), labels)
     except (json.JSONDecodeError, TypeError, ValueError):
-        predicted = _normalize_sentiment(response_text, labels)
     return predicted, response_text
@@ -147,18 +157,30 @@ async def run_episode(
     openai_client_options: dict[str, Any] = {"api_key": api_key}
     if resolved_openai_base_url:
         openai_client_options["base_url"] = resolved_openai_base_url
     client = AsyncOpenAI(**openai_client_options)
     async with EarningsAnalystEnv(base_url=environment_base_url) as env:
         reset_out = await env.reset()
         observation = reset_out.observation
         predicted, response_text = await predict_with_openai(
-            observation, client=client, model=model_name
         )
         step_out = await env.step(EarningsAnalystAction(prediction=predicted))
         step_observation = step_out.observation
-        observation_metadata = getattr(step_observation, "metadata", None) or {}
-        ground_truth_label = str(observation_metadata.get("ground_truth", ""))
         reward = step_out.reward
         if verbose:
             print(

     model_response_text: str | None = None
+def _normalize_prediction(model_text: str, valid: list[str] | None = None) -> str:
+    """Map model output to a canonical label or return as is for regression."""
+    if not valid:
+        return model_text.strip()
+    labels = valid
     normalized_model_text = str(model_text).strip().lower()
     for canonical_label in labels:
         if normalized_model_text == canonical_label.lower():
     valid_labels: list[str] | None = None,
 ) -> tuple[str, str]:
     """
+    Example Chat Completions call returning a JSON object.
     """
     user_content = build_user_content(obs)
     system_prompt = (
         "You are a financial analyst assistant. "
+        "Your task is to analyze the provided financial data and respond "
+        "EXACTLY as instructed in the Task Instruction. "
+        "Reply with a single JSON object only, no markdown or extra text."
     )
     completion = await client.chat.completions.create(
         model=model,
         response_format={"type": "json_object"},
     )
     response_text = (completion.choices[0].message.content or "").strip()
+    # Try to extract the primary value based on common keys
+    predicted = response_text
     try:
         parsed: dict[str, Any] = json.loads(response_text)
+        if isinstance(parsed, dict):
+            # Check for common return keys
+            for key in ["sentiment", "move", "label", "prediction"]:
+                if key in parsed:
+                    if valid_labels:
+                        predicted = _normalize_prediction(str(parsed[key]), valid_labels)
+                    else:
+                        predicted = str(parsed[key])
+                    break
     except (json.JSONDecodeError, TypeError, ValueError):
+        if valid_labels:
+            predicted = _normalize_prediction(response_text, valid_labels)
     return predicted, response_text
     openai_client_options: dict[str, Any] = {"api_key": api_key}
     if resolved_openai_base_url:
         openai_client_options["base_url"] = resolved_openai_base_url
+    if verbose:
+        print(f"DEBUG: Using base_url={resolved_openai_base_url or 'default'} model={model_name}")
     client = AsyncOpenAI(**openai_client_options)
     async with EarningsAnalystEnv(base_url=environment_base_url) as env:
         reset_out = await env.reset()
         observation = reset_out.observation
+        # We pass valid_labels if they exist in the observation/registry
+        # This implementation assumes the client can fetch labels or we hardcode.
+        # For simplicity, we'll try to use labels from metadata if available on reset
+        # Or just use None for regression.
+        valid_labels = getattr(observation, "label_values", None)
         predicted, response_text = await predict_with_openai(
+            observation, client=client, model=model_name, valid_labels=valid_labels
         )
         step_out = await env.step(EarningsAnalystAction(prediction=predicted))
         step_observation = step_out.observation
+        ground_truth_label = str(getattr(step_observation, "ground_truth", ""))
         reward = step_out.reward
         if verbose:
             print(

server/app.py CHANGED Viewed

@@ -32,7 +32,7 @@ except Exception as e:  # pragma: no cover
 try:
     from ..models import EarningsAnalystAction, EarningsAnalystObservation
     from .earnings_analyst_environment import EarningsAnalystEnvironment
-except ModuleNotFoundError:
     from models import EarningsAnalystAction, EarningsAnalystObservation
     from server.earnings_analyst_environment import EarningsAnalystEnvironment

 try:
     from ..models import EarningsAnalystAction, EarningsAnalystObservation
     from .earnings_analyst_environment import EarningsAnalystEnvironment
+except (ImportError, ModuleNotFoundError):
     from models import EarningsAnalystAction, EarningsAnalystObservation
     from server.earnings_analyst_environment import EarningsAnalystEnvironment

server/earnings_analyst_environment.py CHANGED Viewed

@@ -134,19 +134,21 @@ class EarningsAnalystEnvironment(Environment):
             )
         )
         return EarningsAnalystObservation(
             text_context={},
             numerical_context={},
             task_instruction=self._cfg["task_instruction"],
             done=True,
             reward=reward,
             metadata={
                 "task_id": self._task_id,
                 "predicted": action.prediction,
-                "ground_truth": ground_truth,
             },
         )
     @property
     def state(self) -> State:
         """Current environment state."""

             )
         )
         return EarningsAnalystObservation(
             text_context={},
             numerical_context={},
             task_instruction=self._cfg["task_instruction"],
             done=True,
             reward=reward,
+            ground_truth=ground_truth,
             metadata={
                 "task_id": self._task_id,
                 "predicted": action.prediction,
             },
         )
     @property
     def state(self) -> State:
         """Current environment state."""

tasks/__init__.py CHANGED Viewed

@@ -3,21 +3,11 @@
 from __future__ import annotations
 from .exceptions import TaskNotImplementedError
-from .registry import (
-    DEFAULT_TASK,
-    GRADERS,
-    TASKS,
-    TASK_IDS,
-    get_grader,
-    get_task_spec,
-)
 __all__ = [
-    "DEFAULT_TASK",
-    "GRADERS",
-    "TASKS",
-    "TASK_IDS",
     "TaskNotImplementedError",
-    "get_grader",
-    "get_task_spec",
 ]

 from __future__ import annotations
 from .exceptions import TaskNotImplementedError
+# Registry exports removed to avoid circular imports during dynamic task loading.
+# Use 'from tasks.registry import ...' instead.
 __all__ = [
     "TaskNotImplementedError",
 ]

tasks/grading.py CHANGED Viewed

@@ -94,3 +94,32 @@ def grade_exact(
     if _normalize_text(predicted) == _normalize_text(ground_truth):
         return 1.0
     return 0.0

     if _normalize_text(predicted) == _normalize_text(ground_truth):
         return 1.0
     return 0.0
+def grade_regression(
+    predicted: str,
+    ground_truth: str,
+    scale: float = 0.1,
+) -> float:
+    """
+    Score a numerical prediction: exp(-abs(pred - gt) / scale).
+    Returns 1.0 for exact, decaying towards 0.0.
+    """
+    import math
+    try:
+        # Ground truth is passed as str(float) from the environment
+        gt_val = float(ground_truth)
+    except (ValueError, TypeError):
+        return 0.0
+    # Try to parse predicted as a pure number if it's not JSON
+    # (Though usually the task asks for JSON)
+    try:
+        pred_val = float(predicted)
+    except (ValueError, TypeError):
+        # Fallback: try to find a number in the string or just return 0
+        return 0.0
+    error = abs(pred_val - gt_val)
+    return math.exp(-error / scale)

tasks/next_quarter_move/grader.py CHANGED Viewed

@@ -1,10 +1,29 @@
-"""Grading for ``next_quarter_move`` — implement when the task is ready."""
 from __future__ import annotations
 def grade(predicted: str, ground_truth: str, label_values: list[str]) -> float:
-    raise NotImplementedError(
-        "Task 'next_quarter_move' is not implemented yet. "
-        "Implement grader logic in tasks/next_quarter_move/grader.py."
-    )

+"""Grading for ``next_quarter_move`` (regression)."""
 from __future__ import annotations
+import json
+import re
+from ..grading import grade_regression
 def grade(predicted: str, ground_truth: str, label_values: list[str]) -> float:
+    """
+    Parses predicted string for a 'move' key or a numeric value,
+    then grades against ground_truth via exponential decay.
+    """
+    _ = label_values
+    # Try to extract number from JSON if possible
+    pred_val_str = predicted
+    try:
+        data = json.loads(predicted)
+        if isinstance(data, dict) and "move" in data:
+            pred_val_str = str(data["move"])
+    except (json.JSONDecodeError, TypeError):
+        # Fallback: find the first float-like thing in the string
+        match = re.search(r"[-+]?\d*\.\d+|\d+", predicted)
+        if match:
+            pred_val_str = match.group()
+    return grade_regression(pred_val_str, ground_truth, scale=0.1)

tasks/next_quarter_move/spec.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Task specification for ``next_quarter_move`` — fill in when implementing."""
 from __future__ import annotations
@@ -8,11 +8,28 @@ CANONICAL_TASK_ID = "next_quarter_move"
 SPEC: TaskSpec = {
     "task_id": CANONICAL_TASK_ID,
-    "implemented": False,
-    "text_cols": [],
-    "numerical_cols": [],
-    "label_col": "",
-    "label_values": [],
-    "task_instruction": "",
     "kind": "regression",
 }

+"""Task specification for ``next_quarter_move`` (predicting return until next qtr earnings)."""
 from __future__ import annotations
 SPEC: TaskSpec = {
     "task_id": CANONICAL_TASK_ID,
+    "implemented": True,
+    "text_cols": [
+        "earnings_transcript",
+        "press_release_8k_body",
+        "press_release_ex991",
+        "press_release_ex992",
+    ],
+    "numerical_cols": [
+        "price_momentum_30d",
+        "price_momentum_90d",
+        "pct_from_52w_high_pt",
+        "avg_volume_20d",
+        "d_minus_1_close",
+    ],
+    "label_col": "move_next_qtr",
+    "label_values": [],  # Regression tasks don't use categorical labels
+    "task_instruction": (
+        "Analyse the provided earnings call materials and predict the stock price movement "
+        "from this quarter's earnings date until the day before the next quarter's earnings date.\n\n"
+        "Returns a JSON object matching this exact schema:\n"
+        '{"move": <predicted float, e.g. 0.05 for 5% gain or -0.02 for 2% loss>}\n\n'
+        "Do not include any other keys or explanation."
+    ),
     "kind": "regression",
 }