Spaces:
Running
Running
reverting back
Browse files- utils/model_generation.py +53 -19
utils/model_generation.py
CHANGED
|
@@ -7,26 +7,37 @@ import numpy as np
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
PROMPT_TEMPLATES = {
|
| 14 |
"verbatim_sentiment": {
|
| 15 |
"system": (
|
| 16 |
-
"You are a compliance-grade policy analyst assistant. "
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
),
|
| 21 |
"user_template": """
|
| 22 |
Query: {query}
|
| 23 |
|
| 24 |
-
Deliverables:
|
| 25 |
-
1)
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
Topic hint: {topic_hint}
|
| 32 |
|
|
@@ -44,11 +55,26 @@ Context Sources:
|
|
| 44 |
"abstractive_summary": {
|
| 45 |
"system": (
|
| 46 |
"You are a policy analyst summarizing government documents for a general audience. "
|
| 47 |
-
"
|
| 48 |
-
"
|
|
|
|
|
|
|
| 49 |
),
|
| 50 |
"user_template": """Query: {query}
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
Topic hint: {topic_hint}
|
| 54 |
|
|
@@ -60,11 +86,20 @@ Context DOCS:
|
|
| 60 |
"followup_reasoning": {
|
| 61 |
"system": (
|
| 62 |
"You are an assistant that explains policy documents interactively, reasoning step-by-step. "
|
| 63 |
-
"
|
|
|
|
|
|
|
| 64 |
),
|
| 65 |
"user_template": """User query: {query}
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
Topic: {topic_hint}
|
| 70 |
|
|
@@ -72,11 +107,10 @@ DOCS:
|
|
| 72 |
{context_block}
|
| 73 |
"""
|
| 74 |
},
|
| 75 |
-
|
| 76 |
-
# Add more templates as needed
|
| 77 |
}
|
| 78 |
|
| 79 |
|
|
|
|
| 80 |
# --- LLM client ---
|
| 81 |
def get_do_completion(api_key, model_name, messages, temperature=0.2, max_tokens=800):
|
| 82 |
url = "https://inference.do-ai.run/v1/chat/completions"
|
|
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
PROMPT_TEMPLATES = {
|
| 11 |
"verbatim_sentiment": {
|
| 12 |
"system": (
|
| 13 |
+
"You are a compliance-grade policy analyst assistant. Prime directive: be faithful to the provided sources. "
|
| 14 |
+
"Do NOT speculate. If the answer is not supported by the sources, say 'Not found in sources' and stop. "
|
| 15 |
+
"Every non-trivial claim MUST be grounded with an inline citation in the form (filename p.X). "
|
| 16 |
+
"Prefer 'unknown/not stated' over guessing. "
|
| 17 |
+
"Follow this Grounding Protocol before answering: (1) read Context Sources; (2) extract exact quotes; "
|
| 18 |
+
"(3) map each assertion to a citation; (4) list gaps and unknowns. "
|
| 19 |
+
"Write in a direct, corporate tone; skeptical, no sugar-coating. "
|
| 20 |
+
"Avoid hallucinations. Base everything strictly on the content provided. "
|
| 21 |
+
"Output must NOT be overly concise—use complete sentences and adequate context. Target depth: medium-to-long. "
|
| 22 |
+
"If sentiment or coherence inputs are disabled or empty, omit those sections entirely (do not mention they were omitted)."
|
| 23 |
+
"Do not even write anything in sentiment and coherence if it is not available"
|
| 24 |
),
|
| 25 |
"user_template": """
|
| 26 |
Query: {query}
|
| 27 |
|
| 28 |
+
Deliverables (use the exact section headers below; omit any section whose input is empty/disabled):
|
| 29 |
+
1) Quoted Policy Excerpts
|
| 30 |
+
- Quote the necessary text and append citations like (filename p.X). Group by subtopic.
|
| 31 |
+
2) Sentiment Summary
|
| 32 |
+
- Using the Sentiment JSON, explain tone, gaps, penalties, and enforcement clarity in plain English. Do not invent fields that aren't present.
|
| 33 |
+
3) Coherence Assessment
|
| 34 |
+
- From the coherence report only provide when ticked: state on-topic vs off-topic; call out which sections were coherent, off-topic, or repeated.
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
Constraints:
|
| 38 |
+
- No external knowledge. No speculation. If a user ask is outside the sources, state 'Not found in sources.'
|
| 39 |
+
- Use full sentences (no telegraphic fragments).
|
| 40 |
+
- Each substantive statement has a citation.
|
| 41 |
|
| 42 |
Topic hint: {topic_hint}
|
| 43 |
|
|
|
|
| 55 |
"abstractive_summary": {
|
| 56 |
"system": (
|
| 57 |
"You are a policy analyst summarizing government documents for a general audience. "
|
| 58 |
+
"Faithfulness is mandatory: paraphrase only what is supported by the sources and cite key claims inline (filename p.X). "
|
| 59 |
+
"Avoid quotes unless legally binding language is essential. "
|
| 60 |
+
"Bias toward completeness over brevity; use full sentences and helpful structure. "
|
| 61 |
+
"If critical info is absent, say 'Not found in sources'—do not infer."
|
| 62 |
),
|
| 63 |
"user_template": """Query: {query}
|
| 64 |
|
| 65 |
+
Write a comprehensive, plain-language summary with these sections:
|
| 66 |
+
- What It Covers (scope, entities, timelines) [cite]
|
| 67 |
+
- Key Requirements & Controls (what must be done) [cite]
|
| 68 |
+
- Enforcement & Penalties (who enforces, how, consequences) [cite]
|
| 69 |
+
- Deadlines & Effective Dates (explicit dates or 'not stated') [cite]
|
| 70 |
+
- Exemptions/Thresholds (if any; otherwise 'not stated') [cite]
|
| 71 |
+
- Risks & Open Questions (gaps/ambiguities; no speculation)
|
| 72 |
+
- Action Checklist (practical steps derived strictly from the sources) [cite]
|
| 73 |
+
|
| 74 |
+
Rules:
|
| 75 |
+
- Use citations for non-obvious claims (filename p.X).
|
| 76 |
+
- Avoid quotes unless a phrase is legally binding.
|
| 77 |
+
- If the sources do not answer the query, state 'Not found in sources'.
|
| 78 |
|
| 79 |
Topic hint: {topic_hint}
|
| 80 |
|
|
|
|
| 86 |
"followup_reasoning": {
|
| 87 |
"system": (
|
| 88 |
"You are an assistant that explains policy documents interactively, reasoning step-by-step. "
|
| 89 |
+
"Be strictly faithful to the documents; if a detail is absent, say so. "
|
| 90 |
+
"Cite document filename and page for each factual claim. "
|
| 91 |
+
"Favor clarity and completeness over brevity; full sentences only."
|
| 92 |
),
|
| 93 |
"user_template": """User query: {query}
|
| 94 |
|
| 95 |
+
Answer step-by-step:
|
| 96 |
+
1) Direct Answer (what the sources actually support) with inline citations (filename p.X).
|
| 97 |
+
2) Why (short reasoning mapped to specific passages) with citations.
|
| 98 |
+
3) Edge Cases & Exceptions (only if present; otherwise 'not stated') with citations.
|
| 99 |
+
4) What’s Missing (explicitly note absent info; no speculation).
|
| 100 |
+
|
| 101 |
+
Then list 3–6 Follow-up Questions a reader might ask, and answer each using the docs.
|
| 102 |
+
- If a follow-up cannot be answered with the docs, respond: 'Not found in sources.'
|
| 103 |
|
| 104 |
Topic: {topic_hint}
|
| 105 |
|
|
|
|
| 107 |
{context_block}
|
| 108 |
"""
|
| 109 |
},
|
|
|
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
|
| 113 |
+
|
| 114 |
# --- LLM client ---
|
| 115 |
def get_do_completion(api_key, model_name, messages, temperature=0.2, max_tokens=800):
|
| 116 |
url = "https://inference.do-ai.run/v1/chat/completions"
|