Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

Anisha Bhatnagar commited on Oct 29

Commit

2194877

1 Parent(s): 8c133f5

added structered response generation as openai was truncating feature names

Browse files

Files changed (1) hide show

utils/llm_feat_utils.py +24 -11

utils/llm_feat_utils.py CHANGED Viewed

@@ -32,19 +32,20 @@ def generate_feature_spans(client, text: str, features: list[str]) -> str:
     """
     Call to OpenAI to extract spans. Returns a JSON string.
     """
     prompt = f"""You are a linguistic specialist. Given a writing sample and a list of descriptive features, identify the exact text spans that demonstrate each feature.
     Important:
     - The headers like "Document 1:" etc are NOT part of the original text — ignore them.
     - For each feature, even if there is no match, return an empty list.
     - Only return exact phrases from the text.
-    Respond in JSON format like:
-    {{
-      "feature1": ["span1", "span2"],
-      "feature2": [],
-      …
-    }}
     Text:
     \"\"\"{text}\"\"\"
@@ -52,9 +53,9 @@ def generate_feature_spans(client, text: str, features: list[str]) -> str:
     Style Features:
     {features}
     """
-    print('==================>>>>>>>>>>')
-    print(prompt)
-    print('==================>>>>>>>>>>')
     response = client.chat.completions.create(
         model="gpt-4o",
         messages=[{"role":"user","content":prompt}]
@@ -71,8 +72,14 @@ def generate_feature_spans_with_retries(client, text: str, features: list[str])
     for attempt in range(MAX_ATTEMPTS):
         try:
             response_str = generate_feature_spans(client, text, features)
-            print(response_str)
             result = json.loads(response_str)
             return result
         except (JSONDecodeError, ValueError) as e:
             print(f"Attempt {attempt+1} failed: {e}")
@@ -116,7 +123,13 @@ def generate_feature_spans_cached(client, text: str, features: list[str], role:
         if h in cache:
             # print(f"Found feature: {feat}")
             found_feats_count += 1
-            result[feat] = cache[h]["spans"]
         else:
             # print(f"Missing feature: {feat}")
             missing_feats_count += 1

     """
     Call to OpenAI to extract spans. Returns a JSON string.
     """
+    # For some of the longer features, openai client was truncating the feature names, resulting in downstream errors.
+    # Adding structured JSON template to ensure all features are included properly.
+    features_json_template = {feature: [] for feature in features}
     prompt = f"""You are a linguistic specialist. Given a writing sample and a list of descriptive features, identify the exact text spans that demonstrate each feature.
     Important:
     - The headers like "Document 1:" etc are NOT part of the original text — ignore them.
     - For each feature, even if there is no match, return an empty list.
     - Only return exact phrases from the text.
+    - Use the EXACT feature names as JSON keys - do not paraphrase or shorten them.
+    Respond in this EXACT JSON format (use these exact keys, populate the lists with the extracted text spans):
+    {json.dumps(features_json_template, indent=2)}
     Text:
     \"\"\"{text}\"\"\"
     Style Features:
     {features}
     """
+    # print('==================>>>>>>>>>>')
+    # print(prompt)
+    # print('==================>>>>>>>>>>')
     response = client.chat.completions.create(
         model="gpt-4o",
         messages=[{"role":"user","content":prompt}]
     for attempt in range(MAX_ATTEMPTS):
         try:
             response_str = generate_feature_spans(client, text, features)
+            # print(response_str)
             result = json.loads(response_str)
+            # Additional check to ensure all requested features are present in the response correctly
+            if result.keys() != set(features):
+                print("Response keys do not match requested features. Retrying!")
+                response_str = generate_feature_spans(client, text, features)
+                # print(response_str)
+                result = json.loads(response_str)
             return result
         except (JSONDecodeError, ValueError) as e:
             print(f"Attempt {attempt+1} failed: {e}")
         if h in cache:
             # print(f"Found feature: {feat}")
             found_feats_count += 1
+            if cache[h]["spans"] is None:
+                print(f"Missing feature: {feat}")
+                missing_feats_count += 1
+                missing_feats.append(feat)
+            else:
+                result[feat] = cache[h]["spans"]
         else:
             # print(f"Missing feature: {feat}")
             missing_feats_count += 1