Spaces:
Sleeping
Sleeping
| """Module E: Demand Utility — estimates how frequently this knowledge is needed by users.""" | |
| from __future__ import annotations | |
| import json | |
| import anthropic | |
| from kvl.ingestor import Document | |
| _TOPIC_PROMPT = """Analyze this document and identify its main topics and themes. | |
| For each topic, estimate: | |
| 1. How frequently real users would search for or need this information (1-10 scale) | |
| 2. Whether this is a high-priority domain (healthcare, climate, food security, education, policy, etc.) | |
| 3. Whether there are significant unmet information needs in this area | |
| Return ONLY a JSON array of objects: | |
| [ | |
| {{ | |
| "topic": "topic name", | |
| "description": "what aspect of the document this covers", | |
| "query_frequency": <int 1-10>, | |
| "priority_domain": <bool>, | |
| "unmet_need": <bool>, | |
| "rationale": "one sentence" | |
| }}, | |
| ... | |
| ] | |
| Extract 5-8 topics. | |
| Document: | |
| {document}""" | |
| _DEMAND_SUMMARY_PROMPT = """Based on this knowledge document's topics and characteristics, assess its overall demand utility. | |
| Topics identified: | |
| {topics} | |
| Document summary: | |
| {summary} | |
| Evaluate: | |
| 1. How broad vs. specialized is the audience for this knowledge? | |
| 2. Are there gaps in existing AI model knowledge that this document fills? | |
| 3. What is the geographic or linguistic scope? | |
| 4. How actionable is this knowledge for typical users? | |
| Return ONLY JSON: | |
| {{ | |
| "audience_breadth": <int 1-10>, | |
| "knowledge_gap_fill": <int 1-10>, | |
| "geographic_relevance": <int 1-10>, | |
| "actionability": <int 1-10>, | |
| "demand_score": <int 0-100>, | |
| "summary": "two sentence demand assessment" | |
| }}""" | |
| def _call_claude(client: anthropic.Anthropic, prompt: str, model: str = "claude-sonnet-4-6") -> str: | |
| msg = client.messages.create( | |
| model=model, | |
| max_tokens=1024, | |
| messages=[{"role": "user", "content": prompt}], | |
| system="You are an expert in knowledge management and information demand analysis.", | |
| ) | |
| return msg.content[0].text.strip() | |
| def _parse_json(raw: str) -> dict | list | None: | |
| raw = raw.strip() | |
| if raw.startswith("```"): | |
| raw = "\n".join(raw.split("\n")[1:]) | |
| raw = raw.rsplit("```", 1)[0] | |
| try: | |
| return json.loads(raw) | |
| except json.JSONDecodeError: | |
| return None | |
| def evaluate(client: anthropic.Anthropic, doc: Document, progress_cb=None) -> dict: | |
| """Return demand utility score (0-100) and topic analysis.""" | |
| if progress_cb: | |
| progress_cb("Extracting document topics and themes...") | |
| text = " ".join(doc.raw.split()[:5000]) | |
| raw_topics = _call_claude(client, _TOPIC_PROMPT.format(document=text), model="claude-haiku-4-5-20251001") | |
| topics = _parse_json(raw_topics) | |
| if not topics or not isinstance(topics, list): | |
| return {"score": 50, "topics": [], "summary": "Could not extract topics from document."} | |
| # Compute a base score from topic frequency ratings | |
| avg_frequency = sum(t.get("query_frequency", 5) for t in topics) / len(topics) | |
| priority_bonus = sum(1 for t in topics if t.get("priority_domain", False)) / len(topics) | |
| unmet_bonus = sum(1 for t in topics if t.get("unmet_need", False)) / len(topics) | |
| # Base score from topic analysis | |
| base_score = (avg_frequency / 10) * 60 + priority_bonus * 20 + unmet_bonus * 20 | |
| if progress_cb: | |
| progress_cb("Estimating overall demand utility...") | |
| # Get holistic demand assessment from Claude | |
| doc_summary = f"Title: {doc.title}. Words: {doc.word_count}. Sections: {len(doc.sections)}." | |
| topics_str = json.dumps(topics, indent=2) | |
| raw_demand = _call_claude(client, _DEMAND_SUMMARY_PROMPT.format(topics=topics_str, summary=doc_summary)) | |
| demand_assessment = _parse_json(raw_demand) | |
| if demand_assessment and isinstance(demand_assessment, dict): | |
| llm_score = demand_assessment.get("demand_score", base_score) | |
| # Blend base score (40%) with LLM holistic score (60%) | |
| final_score = round(0.4 * base_score + 0.6 * llm_score) | |
| assessment_summary = demand_assessment.get("summary", "") | |
| else: | |
| final_score = round(base_score) | |
| assessment_summary = "" | |
| final_score = max(0, min(100, final_score)) | |
| return { | |
| "score": final_score, | |
| "topics": topics, | |
| "assessment": demand_assessment or {}, | |
| "summary": assessment_summary or f"Demand estimated from {len(topics)} topic areas. Base score: {round(base_score)}/100.", | |
| } | |