Spaces:
Sleeping
Sleeping
| """ | |
| Cogni-Engine v1 — Compositional Language Generation | |
| Builds natural language responses from reasoning chains. | |
| NOT templates — sentences are composed from semantic components. | |
| Every response is unique due to probabilistic construction. | |
| Pipeline: | |
| 1. Structure Planning → Decide segment order | |
| 2. Segment Synthesis → Build each segment from chains | |
| 3. Confidence Modulation → Adjust certainty of language | |
| 4. Personality Adaptation → Apply system prompt style | |
| 5. Markdown Assembly → Final formatted output | |
| """ | |
| import re | |
| import time | |
| import random | |
| from typing import List, Dict, Optional, Tuple, Any | |
| import numpy as np | |
| import config | |
| import utils | |
| from knowledge import Node, Edge, ReasoningChain | |
| # ═══════════════════════════════════════════════════════════ | |
| # VOCABULARY POOLS | |
| # ═══════════════════════════════════════════════════════════ | |
| # Each pool maps a semantic role to multiple surface forms. | |
| # Selection is probabilistic — never the same output twice. | |
| VOCAB = { | |
| # ── Indonesian ── | |
| "id": { | |
| # Relation verbs: how to express a relation as natural language | |
| "relation_verbs": { | |
| "is_a": [ | |
| ("merupakan", 1.0), ("adalah", 0.9), ("termasuk dalam", 0.7), | |
| ("dikategorikan sebagai", 0.5), ("tergolong sebagai", 0.5), | |
| ("dapat diklasifikasikan sebagai", 0.3), | |
| ], | |
| "part_of": [ | |
| ("merupakan bagian dari", 1.0), ("termasuk dalam", 0.8), | |
| ("menjadi bagian dari", 0.7), ("berada dalam cakupan", 0.4), | |
| ("tercakup dalam", 0.5), | |
| ], | |
| "has": [ | |
| ("memiliki", 1.0), ("mempunyai", 0.8), | |
| ("dilengkapi dengan", 0.5), ("mencakup", 0.6), | |
| ("terdapat", 0.6), | |
| ], | |
| "located_in": [ | |
| ("terletak di", 1.0), ("berada di", 0.9), | |
| ("berlokasi di", 0.7), ("terdapat di", 0.6), | |
| ("ditemukan di", 0.4), | |
| ], | |
| "created_by": [ | |
| ("dibuat oleh", 1.0), ("diciptakan oleh", 0.8), | |
| ("dikembangkan oleh", 0.7), ("dirancang oleh", 0.5), | |
| ("dihasilkan oleh", 0.6), | |
| ], | |
| "used_for": [ | |
| ("digunakan untuk", 1.0), ("berfungsi untuk", 0.8), | |
| ("dipakai untuk", 0.7), ("diterapkan untuk", 0.5), | |
| ("berguna untuk", 0.6), ("dimanfaatkan untuk", 0.5), | |
| ], | |
| "causes": [ | |
| ("menyebabkan", 1.0), ("mengakibatkan", 0.8), | |
| ("menimbulkan", 0.7), ("memicu", 0.6), | |
| ("berdampak pada", 0.5), ("berujung pada", 0.4), | |
| ], | |
| "prevents": [ | |
| ("mencegah", 1.0), ("menghambat", 0.7), | |
| ("menghalangi", 0.6), ("menangkal", 0.4), | |
| ], | |
| "requires": [ | |
| ("membutuhkan", 1.0), ("memerlukan", 0.9), | |
| ("bergantung pada", 0.7), ("mensyaratkan", 0.5), | |
| ], | |
| "contains": [ | |
| ("mengandung", 1.0), ("berisi", 0.9), | |
| ("terdiri dari", 0.7), ("mencakup", 0.6), | |
| ("memuat", 0.5), | |
| ], | |
| "follows": [ | |
| ("diikuti oleh", 1.0), ("dilanjutkan dengan", 0.7), | |
| ("kemudian", 0.8), ("setelah itu", 0.6), | |
| ], | |
| "similar_to": [ | |
| ("mirip dengan", 1.0), ("serupa dengan", 0.8), | |
| ("memiliki kemiripan dengan", 0.6), | |
| ("sejalan dengan", 0.5), ("analog dengan", 0.4), | |
| ], | |
| "opposite_of": [ | |
| ("berlawanan dengan", 1.0), ("bertentangan dengan", 0.8), | |
| ("kebalikan dari", 0.7), ("berbeda dari", 0.5), | |
| ], | |
| "synonym_of": [ | |
| ("sama dengan", 1.0), ("sinonim dari", 0.7), | |
| ("bermakna sama dengan", 0.6), ("setara dengan", 0.5), | |
| ], | |
| "defined_as": [ | |
| ("didefinisikan sebagai", 1.0), ("diartikan sebagai", 0.8), | |
| ("bermakna", 0.7), ("berarti", 0.9), | |
| ("dapat dimaknai sebagai", 0.4), | |
| ], | |
| "example_of": [ | |
| ("merupakan contoh dari", 1.0), ("adalah contoh", 0.8), | |
| ("salah satu bentuk dari", 0.6), | |
| ], | |
| "instance_of": [ | |
| ("termasuk kategori", 1.0), ("merupakan bagian dari kelompok", 0.7), | |
| ("masuk dalam klasifikasi", 0.5), | |
| ], | |
| "analogous_to": [ | |
| ("dapat dianalogikan dengan", 1.0), ("seperti halnya", 0.8), | |
| ("sebanding dengan", 0.7), ("ibarat", 0.6), | |
| ], | |
| "related_to": [ | |
| ("berkaitan dengan", 1.0), ("berhubungan dengan", 0.9), | |
| ("terkait dengan", 0.8), ("memiliki hubungan dengan", 0.6), | |
| ("ada kaitannya dengan", 0.5), | |
| ], | |
| "inferred_relation": [ | |
| ("tampaknya berkaitan dengan", 1.0), | |
| ("kemungkinan berhubungan dengan", 0.8), | |
| ("sepertinya terkait dengan", 0.7), | |
| ], | |
| }, | |
| # Connectors between segments | |
| "connectors": { | |
| "addition": [ | |
| ("Selain itu, ", 1.0), ("Di samping itu, ", 0.7), | |
| ("Lebih lanjut, ", 0.6), ("Tidak hanya itu, ", 0.5), | |
| ("Hal ini juga ", 0.4), ("Ditambah lagi, ", 0.4), | |
| ], | |
| "contrast": [ | |
| ("Namun, ", 1.0), ("Akan tetapi, ", 0.7), | |
| ("Meskipun demikian, ", 0.5), ("Di sisi lain, ", 0.6), | |
| ("Sebaliknya, ", 0.4), | |
| ], | |
| "cause": [ | |
| ("Oleh karena itu, ", 1.0), ("Karena itu, ", 0.8), | |
| ("Hal ini menyebabkan ", 0.6), ("Akibatnya, ", 0.5), | |
| ("Dengan demikian, ", 0.6), | |
| ], | |
| "elaboration": [ | |
| ("Lebih spesifik, ", 1.0), ("Dengan kata lain, ", 0.8), | |
| ("Secara lebih rinci, ", 0.6), ("Artinya, ", 0.7), | |
| ("Dalam konteks ini, ", 0.5), | |
| ], | |
| "example": [ | |
| ("Sebagai contoh, ", 1.0), ("Misalnya, ", 0.9), | |
| ("Contohnya, ", 0.7), ("Salah satu contohnya, ", 0.5), | |
| ], | |
| "conclusion": [ | |
| ("Secara keseluruhan, ", 1.0), ("Pada intinya, ", 0.8), | |
| ("Kesimpulannya, ", 0.7), ("Ringkasnya, ", 0.5), | |
| ("Dapat disimpulkan bahwa ", 0.4), | |
| ], | |
| "neutral": [ | |
| ("", 1.0), ("Perlu diketahui, ", 0.4), | |
| ("Adapun ", 0.3), ("Terkait hal itu, ", 0.4), | |
| ], | |
| }, | |
| # Confidence qualifiers | |
| "confidence": { | |
| "high": [ | |
| ("", 1.0), # No qualifier needed — assertive | |
| ], | |
| "medium": [ | |
| ("Berdasarkan pemahaman saya, ", 1.0), | |
| ("Dari informasi yang ada, ", 0.8), | |
| ("Sejauh yang saya ketahui, ", 0.7), | |
| ("Menurut pengetahuan saya, ", 0.6), | |
| ], | |
| "low": [ | |
| ("Mungkin ", 1.0), ("Bisa jadi ", 0.8), | |
| ("Kemungkinan besar ", 0.6), | |
| ("Terdapat kemungkinan bahwa ", 0.5), | |
| ("Sepertinya ", 0.7), | |
| ], | |
| "very_low": [ | |
| ("Saya belum memiliki informasi yang cukup, namun ", 1.0), | |
| ("Pengetahuan saya terbatas mengenai hal ini, tapi ", 0.8), | |
| ("Saya kurang yakin, namun ", 0.7), | |
| ], | |
| }, | |
| # Uncertainty acknowledgment components | |
| "uncertainty": { | |
| "acknowledge": [ | |
| "Saat ini saya belum memiliki pengetahuan yang cukup mendalam mengenai {topic}", | |
| "Topik {topic} belum sepenuhnya tercakup dalam pemahaman saya", | |
| "{topic} belum menjadi bagian yang saya pahami secara komprehensif", | |
| "Pengetahuan saya mengenai {topic} masih terbatas", | |
| "Saya belum memiliki cukup informasi untuk membahas {topic} secara mendalam", | |
| ], | |
| "domain_ref": [ | |
| "Pemahaman saya lebih banyak mencakup topik seputar {domains}", | |
| "Saya lebih memahami hal-hal terkait {domains}", | |
| "Area pengetahuan saya saat ini lebih terfokus pada {domains}", | |
| "Bidang yang lebih saya kuasai meliputi {domains}", | |
| ], | |
| "suggestion": [ | |
| "Jika kamu bisa memberikan informasi mengenai {topic}, saya akan mempelajarinya", | |
| "Dengan tambahan data tentang {topic}, saya bisa mengembangkan pemahaman di area tersebut", | |
| "Saya terbuka untuk mempelajari {topic} jika diberikan informasi lebih lanjut", | |
| "Menambahkan data tentang {topic} akan membantu saya memahami topik ini", | |
| ], | |
| }, | |
| # Opening phrases per intent | |
| "openings": { | |
| "explain": [ | |
| ("{subject} ", 1.0), | |
| ("Mengenai {subject}, ", 0.7), | |
| ("Berbicara tentang {subject}, ", 0.5), | |
| ("Terkait {subject}, ", 0.6), | |
| ], | |
| "define": [ | |
| ("{subject} ", 1.0), | |
| ("Secara definisi, {subject} ", 0.6), | |
| ("Yang dimaksud dengan {subject} ", 0.5), | |
| ], | |
| "relation": [ | |
| ("Hubungan antara {subject} ", 1.0), | |
| ("Keterkaitan {subject} ", 0.7), | |
| ("{subject} saling berhubungan — ", 0.5), | |
| ], | |
| "cause": [ | |
| ("Alasan di balik {subject} ", 1.0), | |
| ("Hal ini terjadi karena ", 0.7), | |
| ("{subject} disebabkan oleh ", 0.6), | |
| ], | |
| "compare": [ | |
| ("Perbandingan antara {subject} ", 1.0), | |
| ("Jika membandingkan {subject}, ", 0.7), | |
| ("Terdapat perbedaan dan persamaan — ", 0.5), | |
| ], | |
| "list": [ | |
| ("Berikut ini {subject}: ", 1.0), | |
| ("Beberapa {subject} yang dapat disebutkan: ", 0.7), | |
| ("Terdapat beberapa {subject}, antara lain: ", 0.6), | |
| ], | |
| "how_to": [ | |
| ("Untuk {subject}, ", 1.0), | |
| ("Proses {subject} melibatkan ", 0.7), | |
| ("Langkah-langkah {subject}: ", 0.6), | |
| ], | |
| "greeting": [ | |
| ("Halo! ", 1.0), ("Hai! ", 0.8), | |
| ("Halo, senang bisa membantu! ", 0.6), | |
| ], | |
| "general": [ | |
| ("", 1.0), | |
| ("Mengenai hal itu, ", 0.5), | |
| ], | |
| }, | |
| # Closing phrases | |
| "closings": [ | |
| ("", 1.0), # Often no closing needed | |
| ("Semoga penjelasan ini membantu.", 0.3), | |
| ("Jika ada yang ingin ditanyakan lebih lanjut, silakan.", 0.2), | |
| ], | |
| }, | |
| # ── English ── | |
| "en": { | |
| "relation_verbs": { | |
| "is_a": [ | |
| ("is", 1.0), ("is a type of", 0.8), ("is classified as", 0.6), | |
| ("belongs to the category of", 0.4), ("can be described as", 0.5), | |
| ], | |
| "part_of": [ | |
| ("is part of", 1.0), ("belongs to", 0.8), | |
| ("falls within", 0.6), ("is included in", 0.5), | |
| ], | |
| "has": [ | |
| ("has", 1.0), ("possesses", 0.6), ("features", 0.5), | |
| ("includes", 0.7), ("contains", 0.6), | |
| ], | |
| "located_in": [ | |
| ("is located in", 1.0), ("can be found in", 0.7), | |
| ("is situated in", 0.6), ("resides in", 0.4), | |
| ], | |
| "used_for": [ | |
| ("is used for", 1.0), ("serves the purpose of", 0.6), | |
| ("is utilized for", 0.5), ("functions as", 0.5), | |
| ], | |
| "causes": [ | |
| ("causes", 1.0), ("leads to", 0.8), ("results in", 0.7), | |
| ("brings about", 0.5), ("triggers", 0.6), | |
| ], | |
| "similar_to": [ | |
| ("is similar to", 1.0), ("resembles", 0.7), | |
| ("shares similarities with", 0.6), ("is akin to", 0.4), | |
| ], | |
| "related_to": [ | |
| ("is related to", 1.0), ("is connected to", 0.8), | |
| ("is associated with", 0.7), ("has ties to", 0.5), | |
| ], | |
| "defined_as": [ | |
| ("is defined as", 1.0), ("means", 0.9), ("refers to", 0.7), | |
| ("can be understood as", 0.5), | |
| ], | |
| "created_by": [ | |
| ("was created by", 1.0), ("was developed by", 0.8), | |
| ("was designed by", 0.6), | |
| ], | |
| "requires": [ | |
| ("requires", 1.0), ("needs", 0.8), ("depends on", 0.7), | |
| ], | |
| "follows": [ | |
| ("is followed by", 1.0), ("comes after", 0.7), ("then", 0.8), | |
| ], | |
| "opposite_of": [ | |
| ("is the opposite of", 1.0), ("contrasts with", 0.7), | |
| ], | |
| "synonym_of": [ | |
| ("is synonymous with", 1.0), ("means the same as", 0.7), | |
| ], | |
| "contains": [ | |
| ("contains", 1.0), ("comprises", 0.7), ("consists of", 0.6), | |
| ], | |
| "prevents": [ | |
| ("prevents", 1.0), ("inhibits", 0.6), ("blocks", 0.5), | |
| ], | |
| "example_of": [ | |
| ("is an example of", 1.0), ("exemplifies", 0.6), | |
| ], | |
| "instance_of": [ | |
| ("is an instance of", 1.0), ("is a member of", 0.7), | |
| ], | |
| "analogous_to": [ | |
| ("is analogous to", 1.0), ("is like", 0.8), ("is comparable to", 0.6), | |
| ], | |
| "inferred_relation": [ | |
| ("appears to be related to", 1.0), | |
| ("seems connected to", 0.8), | |
| ], | |
| }, | |
| "connectors": { | |
| "addition": [ | |
| ("Furthermore, ", 1.0), ("Additionally, ", 0.8), | |
| ("Moreover, ", 0.7), ("In addition, ", 0.6), | |
| ], | |
| "contrast": [ | |
| ("However, ", 1.0), ("On the other hand, ", 0.7), | |
| ("Nevertheless, ", 0.5), ("Conversely, ", 0.4), | |
| ], | |
| "cause": [ | |
| ("Therefore, ", 1.0), ("As a result, ", 0.8), | |
| ("Consequently, ", 0.6), ("Thus, ", 0.7), | |
| ], | |
| "elaboration": [ | |
| ("Specifically, ", 1.0), ("In other words, ", 0.8), | |
| ("More precisely, ", 0.6), ("That is, ", 0.7), | |
| ], | |
| "example": [ | |
| ("For example, ", 1.0), ("For instance, ", 0.8), | |
| ("Such as ", 0.5), | |
| ], | |
| "conclusion": [ | |
| ("Overall, ", 1.0), ("In summary, ", 0.8), | |
| ("To sum up, ", 0.6), ("In essence, ", 0.5), | |
| ], | |
| "neutral": [ | |
| ("", 1.0), ("It is worth noting that ", 0.4), | |
| ], | |
| }, | |
| "confidence": { | |
| "high": [("", 1.0)], | |
| "medium": [ | |
| ("Based on my understanding, ", 1.0), | |
| ("From the information available, ", 0.8), | |
| ("As far as I know, ", 0.7), | |
| ], | |
| "low": [ | |
| ("Possibly, ", 1.0), ("It might be that ", 0.8), | |
| ("There's a chance that ", 0.6), ("Perhaps ", 0.7), | |
| ], | |
| "very_low": [ | |
| ("I don't have sufficient information, but ", 1.0), | |
| ("My knowledge on this is limited, however ", 0.8), | |
| ], | |
| }, | |
| "uncertainty": { | |
| "acknowledge": [ | |
| "I don't currently have sufficient knowledge about {topic}", | |
| "The topic of {topic} is not yet well covered in my understanding", | |
| "My knowledge regarding {topic} is still limited", | |
| ], | |
| "domain_ref": [ | |
| "My understanding is more focused on {domains}", | |
| "I'm more knowledgeable about topics like {domains}", | |
| "The areas I know better include {domains}", | |
| ], | |
| "suggestion": [ | |
| "If you could provide information about {topic}, I'd be able to learn about it", | |
| "Adding data about {topic} would help me understand this area better", | |
| ], | |
| }, | |
| "openings": { | |
| "explain": [ | |
| ("{subject} ", 1.0), ("Regarding {subject}, ", 0.7), | |
| ("When it comes to {subject}, ", 0.5), | |
| ], | |
| "define": [ | |
| ("{subject} ", 1.0), ("By definition, {subject} ", 0.6), | |
| ], | |
| "relation": [ | |
| ("The relationship between {subject} ", 1.0), | |
| ("The connection of {subject} ", 0.7), | |
| ], | |
| "cause": [ | |
| ("The reason behind {subject} ", 1.0), | |
| ("This happens because ", 0.7), | |
| ], | |
| "compare": [ | |
| ("Comparing {subject}, ", 1.0), | |
| ("When contrasting {subject}, ", 0.6), | |
| ], | |
| "list": [ | |
| ("Here are {subject}: ", 1.0), | |
| ("The following {subject} can be noted: ", 0.6), | |
| ], | |
| "how_to": [ | |
| ("To {subject}, ", 1.0), | |
| ("The process of {subject} involves ", 0.7), | |
| ], | |
| "greeting": [ | |
| ("Hello! ", 1.0), ("Hi there! ", 0.8), | |
| ("Hello, happy to help! ", 0.6), | |
| ], | |
| "general": [("", 1.0)], | |
| }, | |
| "closings": [ | |
| ("", 1.0), | |
| ("I hope this helps.", 0.3), | |
| ("Feel free to ask if you need more details.", 0.2), | |
| ], | |
| }, | |
| } | |
| # ═══════════════════════════════════════════════════════════ | |
| # RESPONSE STRUCTURE TEMPLATES | |
| # ═══════════════════════════════════════════════════════════ | |
| # Not rigid — these define POSSIBLE segment orderings. | |
| # Actual ordering is selected probabilistically. | |
| STRUCTURE_TEMPLATES = { | |
| "explain": [ | |
| (["introduction", "main_explanation", "supporting_detail", "conclusion"], 1.0), | |
| (["introduction", "main_explanation", "elaboration"], 0.8), | |
| (["introduction", "main_explanation", "example", "conclusion"], 0.7), | |
| (["main_explanation", "supporting_detail", "elaboration"], 0.6), | |
| (["introduction", "main_explanation", "context"], 0.5), | |
| ], | |
| "define": [ | |
| (["introduction", "main_explanation"], 1.0), | |
| (["main_explanation", "example"], 0.8), | |
| (["introduction", "main_explanation", "elaboration"], 0.6), | |
| ], | |
| "relation": [ | |
| (["introduction", "main_explanation", "supporting_detail"], 1.0), | |
| (["introduction", "main_explanation", "inference", "conclusion"], 0.8), | |
| (["main_explanation", "supporting_detail", "context"], 0.6), | |
| ], | |
| "cause": [ | |
| (["introduction", "main_explanation", "supporting_detail"], 1.0), | |
| (["main_explanation", "inference", "conclusion"], 0.7), | |
| (["introduction", "main_explanation", "elaboration", "conclusion"], 0.6), | |
| ], | |
| "compare": [ | |
| (["introduction", "main_explanation", "comparison", "conclusion"], 1.0), | |
| (["main_explanation", "comparison", "supporting_detail"], 0.8), | |
| ], | |
| "list": [ | |
| (["introduction", "main_explanation"], 1.0), | |
| (["introduction", "main_explanation", "elaboration"], 0.6), | |
| ], | |
| "how_to": [ | |
| (["introduction", "main_explanation", "supporting_detail"], 1.0), | |
| (["main_explanation", "elaboration", "conclusion"], 0.7), | |
| ], | |
| "greeting": [ | |
| (["introduction"], 1.0), | |
| (["introduction", "suggestion"], 0.5), | |
| ], | |
| "general": [ | |
| (["main_explanation", "supporting_detail"], 1.0), | |
| (["introduction", "main_explanation", "conclusion"], 0.7), | |
| (["main_explanation", "elaboration"], 0.6), | |
| ], | |
| "opinion": [ | |
| (["introduction", "main_explanation", "supporting_detail", "conclusion"], 1.0), | |
| (["main_explanation", "context", "conclusion"], 0.7), | |
| ], | |
| "followup": [ | |
| (["main_explanation", "supporting_detail"], 1.0), | |
| (["main_explanation", "elaboration", "conclusion"], 0.7), | |
| ], | |
| # When confidence is very low — special structure | |
| "_uncertain": [ | |
| (["acknowledgment_of_uncertainty", "context", "suggestion"], 1.0), | |
| (["acknowledgment_of_uncertainty", "suggestion"], 0.8), | |
| (["context", "acknowledgment_of_uncertainty", "suggestion"], 0.6), | |
| (["acknowledgment_of_uncertainty", "context"], 0.5), | |
| ], | |
| } | |
| # ═══════════════════════════════════════════════════════════ | |
| # LANGUAGE GENERATOR CLASS | |
| # ═══════════════════════════════════════════════════════════ | |
| class LanguageGenerator: | |
| """ | |
| Compositional language generation engine. | |
| Builds responses from reasoning chains using probabilistic | |
| segment planning and compositional sentence synthesis. | |
| """ | |
| def __init__(self): | |
| self._seed = utils.variation_seed() | |
| def generate_response( | |
| self, | |
| chains: List[ReasoningChain], | |
| query_analysis: dict, | |
| personality: dict, | |
| all_nodes: dict, | |
| all_edges: dict, | |
| graph_stats: dict = None | |
| ) -> str: | |
| """ | |
| Main entry point: generate a complete response. | |
| Args: | |
| chains: Reasoning chains from brain's reasoning step | |
| query_analysis: {intent, entities, confidence, query_text} | |
| personality: Parsed system prompt parameters | |
| all_nodes: Reference to graph nodes dict | |
| all_edges: Reference to graph edges dict | |
| graph_stats: Optional graph statistics | |
| Returns: | |
| Markdown-formatted response string | |
| """ | |
| self._seed = utils.variation_seed() | |
| self._rng = utils.seeded_random(self._seed) | |
| intent = query_analysis.get("intent", "general") | |
| confidence = query_analysis.get("confidence", 0.5) | |
| entities = query_analysis.get("entities", []) | |
| lang = personality.get("language", config.DEFAULT_LANGUAGE) | |
| temperature = query_analysis.get("temperature", config.DEFAULT_TEMPERATURE) | |
| # Get vocabulary for target language | |
| vocab = VOCAB.get(lang, VOCAB["id"]) | |
| # ── Handle greeting specially ── | |
| if intent == "greeting": | |
| return self._generate_greeting(personality, vocab, lang) | |
| # ── Determine if we know enough to answer ── | |
| overall_confidence = self._calculate_overall_confidence(chains, confidence) | |
| # ── Choose response structure ── | |
| if overall_confidence < config.CONFIDENCE_LOW: | |
| structure = self._select_structure("_uncertain", temperature) | |
| else: | |
| structure = self._select_structure(intent, temperature) | |
| # ── Build segments ── | |
| segments = [] | |
| for segment_type in structure: | |
| segment_text = self._build_segment( | |
| segment_type=segment_type, | |
| chains=chains, | |
| query_analysis=query_analysis, | |
| personality=personality, | |
| vocab=vocab, | |
| all_nodes=all_nodes, | |
| all_edges=all_edges, | |
| overall_confidence=overall_confidence, | |
| graph_stats=graph_stats, | |
| lang=lang | |
| ) | |
| if segment_text: | |
| segments.append((segment_type, segment_text)) | |
| # ── Connect segments ── | |
| connected = self._connect_segments(segments, vocab, personality) | |
| # ── Apply personality ── | |
| final = self._apply_personality(connected, personality, lang) | |
| # ── Format as markdown ── | |
| final = self._format_markdown(final, segments, intent) | |
| return final.strip() | |
| # ─────────────────────────────────────────────────── | |
| # CONFIDENCE CALCULATION | |
| # ─────────────────────────────────────────────────── | |
| def _calculate_overall_confidence( | |
| self, | |
| chains: List[ReasoningChain], | |
| query_confidence: float | |
| ) -> float: | |
| """Calculate overall response confidence from chains and query match.""" | |
| if not chains: | |
| return query_confidence * 0.3 | |
| chain_confidences = [c.confidence for c in chains] | |
| avg_chain = sum(chain_confidences) / len(chain_confidences) | |
| max_chain = max(chain_confidences) | |
| # Weighted: max matters more than average | |
| combined = (max_chain * 0.6 + avg_chain * 0.4) * query_confidence | |
| return utils.clamp(combined, 0.0, 1.0) | |
| def _get_confidence_level(self, confidence: float) -> str: | |
| """Map confidence float to level string.""" | |
| if confidence >= config.CONFIDENCE_HIGH: | |
| return "high" | |
| elif confidence >= config.CONFIDENCE_MEDIUM: | |
| return "medium" | |
| elif confidence >= config.CONFIDENCE_LOW: | |
| return "low" | |
| return "very_low" | |
| # ─────────────────────────────────────────────────── | |
| # STRUCTURE PLANNING | |
| # ─────────────────────────────────────────────────── | |
| def _select_structure( | |
| self, intent: str, temperature: float | |
| ) -> List[str]: | |
| """Select a response structure probabilistically.""" | |
| templates = STRUCTURE_TEMPLATES.get(intent, STRUCTURE_TEMPLATES["general"]) | |
| structures = [t[0] for t in templates] | |
| weights = [t[1] for t in templates] | |
| return utils.weighted_choice(structures, weights, temperature) | |
| # ─────────────────────────────────────────────────── | |
| # SEGMENT BUILDING | |
| # ─────────────────────────────────────────────────── | |
| def _build_segment( | |
| self, | |
| segment_type: str, | |
| chains: List[ReasoningChain], | |
| query_analysis: dict, | |
| personality: dict, | |
| vocab: dict, | |
| all_nodes: dict, | |
| all_edges: dict, | |
| overall_confidence: float, | |
| graph_stats: dict, | |
| lang: str | |
| ) -> str: | |
| """Build a single response segment.""" | |
| builders = { | |
| "introduction": self._build_introduction, | |
| "main_explanation": self._build_main_explanation, | |
| "supporting_detail": self._build_supporting_detail, | |
| "elaboration": self._build_elaboration, | |
| "example": self._build_example, | |
| "comparison": self._build_comparison, | |
| "inference": self._build_inference, | |
| "context": self._build_context, | |
| "conclusion": self._build_conclusion, | |
| "suggestion": self._build_suggestion, | |
| "acknowledgment_of_uncertainty": self._build_uncertainty, | |
| } | |
| builder = builders.get(segment_type) | |
| if not builder: | |
| return "" | |
| return builder( | |
| chains=chains, | |
| query_analysis=query_analysis, | |
| personality=personality, | |
| vocab=vocab, | |
| all_nodes=all_nodes, | |
| all_edges=all_edges, | |
| confidence=overall_confidence, | |
| graph_stats=graph_stats, | |
| lang=lang | |
| ) | |
| def _build_introduction(self, chains, query_analysis, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build opening segment.""" | |
| intent = query_analysis.get("intent", "general") | |
| entities = query_analysis.get("entities", []) | |
| subject = ", ".join(entities[:2]) if entities else "hal tersebut" | |
| # Select opening phrase | |
| openings = vocab.get("openings", {}).get(intent, vocab["openings"]["general"]) | |
| opening_texts = [o[0] for o in openings] | |
| opening_weights = [o[1] for o in openings] | |
| opening = utils.weighted_choice(opening_texts, opening_weights, 0.7) | |
| opening = opening.replace("{subject}", subject) | |
| # Add confidence qualifier | |
| conf_level = self._get_confidence_level(confidence) | |
| qualifiers = vocab.get("confidence", {}).get(conf_level, [("", 1.0)]) | |
| qualifier_texts = [q[0] for q in qualifiers] | |
| qualifier_weights = [q[1] for q in qualifiers] | |
| qualifier = utils.weighted_choice(qualifier_texts, qualifier_weights, 0.7) | |
| # Get first chain's starting content | |
| first_content = "" | |
| if chains: | |
| first_path = chains[0].path | |
| for item_id in first_path: | |
| node = all_nodes.get(item_id) | |
| if node: | |
| first_content = node.content | |
| break | |
| if first_content and confidence >= config.CONFIDENCE_MEDIUM: | |
| # Build a sentence from the first node | |
| verb = self._get_relation_verb(chains, all_edges, vocab, 0) | |
| if verb: | |
| return f"{qualifier}{opening}{verb} {self._continue_from_chain(chains[0], all_nodes, all_edges, vocab, max_nodes=2)}" | |
| return f"{qualifier}{opening}{first_content}." | |
| elif first_content: | |
| return f"{qualifier}{opening.rstrip()} " | |
| return f"{qualifier}{opening}".strip() | |
| def _build_main_explanation(self, chains, query_analysis, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build the core explanation segment from primary reasoning chain.""" | |
| if not chains: | |
| return "" | |
| primary_chain = chains[0] | |
| return self._chain_to_natural_language( | |
| primary_chain, all_nodes, all_edges, vocab, confidence | |
| ) | |
| def _build_supporting_detail(self, chains, query_analysis, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build supporting detail from secondary chains.""" | |
| if len(chains) < 2: | |
| return "" | |
| secondary_chain = chains[1] | |
| text = self._chain_to_natural_language( | |
| secondary_chain, all_nodes, all_edges, vocab, confidence | |
| ) | |
| return text | |
| def _build_elaboration(self, chains, query_analysis, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build elaboration — deeper explanation of a point.""" | |
| if not chains: | |
| return "" | |
| # Use longest chain for elaboration | |
| longest = max(chains, key=lambda c: len(c.path)) | |
| if len(longest.path) < 5: | |
| return "" | |
| # Focus on the middle/end of the chain (deeper reasoning) | |
| mid_start = len(longest.path) // 3 | |
| relevant_nodes = [] | |
| for item_id in longest.path[mid_start:]: | |
| node = all_nodes.get(item_id) | |
| if node: | |
| relevant_nodes.append(node) | |
| if len(relevant_nodes) < 2: | |
| return "" | |
| parts = [] | |
| for i, node in enumerate(relevant_nodes[:3]): | |
| if i > 0: | |
| # Find edge between this and previous | |
| edge = all_edges.get( | |
| longest.path[mid_start + i * 2 - 1] | |
| if mid_start + i * 2 - 1 < len(longest.path) else None | |
| ) | |
| if edge: | |
| verb = self._select_relation_verb(edge.relation, vocab) | |
| parts.append(f"{verb} {node.content}") | |
| else: | |
| parts.append(node.content) | |
| else: | |
| parts.append(node.content) | |
| return " ".join(parts) + "." | |
| def _build_example(self, chains, vocab, all_nodes, all_edges, **kwargs) -> str: | |
| """Build example segment from chains.""" | |
| if not chains: | |
| return "" | |
| # Find nodes of type entity/fact that could serve as examples | |
| example_nodes = [] | |
| for chain in chains: | |
| for item_id in chain.path: | |
| node = all_nodes.get(item_id) | |
| if node and node.type in ("entity", "fact") and len(node.content) < 200: | |
| example_nodes.append(node) | |
| if not example_nodes: | |
| return "" | |
| # Pick 1-2 examples | |
| selected = example_nodes[:2] if len(example_nodes) > 1 else example_nodes[:1] | |
| example_texts = [n.content for n in selected] | |
| return ", ".join(example_texts) + "." | |
| def _build_comparison(self, chains, vocab, all_nodes, all_edges, **kwargs) -> str: | |
| """Build comparison segment between entities in chains.""" | |
| if len(chains) < 2: | |
| return "" | |
| # Get first node from each of two chains | |
| nodes_a = [all_nodes.get(i) for i in chains[0].path if i in all_nodes] | |
| nodes_b = [all_nodes.get(i) for i in chains[1].path if i in all_nodes] | |
| if not nodes_a or not nodes_b: | |
| return "" | |
| a_content = nodes_a[0].content | |
| b_content = nodes_b[0].content | |
| lang = kwargs.get("lang", "id") | |
| if lang == "id": | |
| return f"{a_content} dan {b_content} memiliki keterkaitan masing-masing dalam konteks ini." | |
| return f"{a_content} and {b_content} each have their own relevance in this context." | |
| def _build_inference(self, chains, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build inference segment — what we can deduce.""" | |
| inferred_chains = [c for c in chains if any( | |
| all_edges.get(i, Edge("", "", "")).source == "inferred" | |
| for i in c.path if i in all_edges | |
| )] | |
| if not inferred_chains: | |
| return "" | |
| chain = inferred_chains[0] | |
| text = self._chain_to_natural_language( | |
| chain, all_nodes, all_edges, vocab, confidence * 0.8 | |
| ) | |
| lang = kwargs.get("lang", "id") | |
| if lang == "id": | |
| prefix = self._rng.choice([ | |
| "Dari sini dapat disimpulkan bahwa ", | |
| "Hal ini menunjukkan bahwa ", | |
| "Dapat dipahami bahwa ", | |
| ]) | |
| else: | |
| prefix = self._rng.choice([ | |
| "From this we can conclude that ", | |
| "This suggests that ", | |
| "It can be understood that ", | |
| ]) | |
| return prefix + text.lstrip() if text else "" | |
| def _build_context(self, chains, vocab, all_nodes, graph_stats, **kwargs) -> str: | |
| """Build context segment — what the AI knows about.""" | |
| if not graph_stats: | |
| return "" | |
| lang = kwargs.get("lang", "id") | |
| # Find top domains (high-weight concept nodes) | |
| concept_nodes = [ | |
| n for n in all_nodes.values() | |
| if n.type == "concept" and n.weight > 0.7 | |
| ] | |
| concept_nodes.sort(key=lambda n: n.weight * n.connections, reverse=True) | |
| top_domains = [n.content for n in concept_nodes[:5]] | |
| if not top_domains: | |
| return "" | |
| domains_str = ", ".join(top_domains[:3]) | |
| templates = vocab.get("uncertainty", {}).get("domain_ref", []) | |
| if templates: | |
| template = self._rng.choice(templates) | |
| return template.replace("{domains}", domains_str) | |
| return "" | |
| def _build_conclusion(self, chains, vocab, all_nodes, all_edges, confidence, **kwargs) -> str: | |
| """Build conclusion segment.""" | |
| if not chains: | |
| return "" | |
| # Summarize key point from highest-confidence chain | |
| best_chain = max(chains, key=lambda c: c.confidence) | |
| nodes_in_chain = [ | |
| all_nodes.get(i) for i in best_chain.path if i in all_nodes | |
| ] | |
| if len(nodes_in_chain) < 2: | |
| return "" | |
| first = nodes_in_chain[0].content | |
| last = nodes_in_chain[-1].content | |
| lang = kwargs.get("lang", "id") | |
| if lang == "id": | |
| templates = [ | |
| f"Dengan demikian, {first} memiliki kaitan erat dengan {last}.", | |
| f"Pada intinya, terdapat hubungan yang signifikan antara {first} dan {last}.", | |
| f"Secara keseluruhan, {first} dan {last} saling terhubung dalam konteks ini.", | |
| ] | |
| else: | |
| templates = [ | |
| f"In essence, {first} is closely connected to {last}.", | |
| f"Overall, there is a significant relationship between {first} and {last}.", | |
| f"To summarize, {first} and {last} are interconnected in this context.", | |
| ] | |
| return self._rng.choice(templates) | |
| def _build_suggestion(self, chains, query_analysis, vocab, **kwargs) -> str: | |
| """Build suggestion segment for uncertain responses.""" | |
| entities = query_analysis.get("entities", []) | |
| topic = ", ".join(entities[:2]) if entities else "topik ini" | |
| templates = vocab.get("uncertainty", {}).get("suggestion", []) | |
| if templates: | |
| template = self._rng.choice(templates) | |
| return template.replace("{topic}", topic) | |
| return "" | |
| def _build_uncertainty(self, chains, query_analysis, vocab, all_nodes, graph_stats, **kwargs) -> str: | |
| """Build honest uncertainty acknowledgment — NOT a template fallback.""" | |
| entities = query_analysis.get("entities", []) | |
| topic = ", ".join(entities[:2]) if entities else "topik tersebut" | |
| templates = vocab.get("uncertainty", {}).get("acknowledge", []) | |
| if templates: | |
| template = self._rng.choice(templates) | |
| return template.replace("{topic}", topic) + "." | |
| return "" | |
| # ─────────────────────────────────────────────────── | |
| # CHAIN → NATURAL LANGUAGE | |
| # ─────────────────────────────────────────────────── | |
| def _chain_to_natural_language( | |
| self, | |
| chain: ReasoningChain, | |
| all_nodes: dict, | |
| all_edges: dict, | |
| vocab: dict, | |
| confidence: float | |
| ) -> str: | |
| """ | |
| Convert a reasoning chain into a natural language sentence. | |
| This is the core compositional synthesis function. | |
| Path: [node_id, edge_id, node_id, edge_id, ...] | |
| Output: "NodeA [relation_verb] NodeB, yang [relation_verb] NodeC." | |
| """ | |
| path = chain.path | |
| if not path: | |
| return "" | |
| parts = [] | |
| prev_node = None | |
| sentence_count = 0 | |
| for i, item_id in enumerate(path): | |
| # ── Node ── | |
| node = all_nodes.get(item_id) | |
| if node: | |
| if prev_node is None: | |
| # First node: start of sentence | |
| parts.append(self._format_node_content(node)) | |
| prev_node = node | |
| else: | |
| # Subsequent node: connect via relation verb | |
| parts.append(self._format_node_content(node)) | |
| prev_node = node | |
| sentence_count += 1 | |
| continue | |
| # ── Edge ── | |
| edge = all_edges.get(item_id) | |
| if edge: | |
| verb = self._select_relation_verb(edge.relation, vocab) | |
| parts.append(f" {verb} ") | |
| continue | |
| if not parts: | |
| return "" | |
| text = "".join(parts).strip() | |
| # Clean up | |
| text = re.sub(r'\s+', ' ', text) | |
| if text and not text.endswith('.'): | |
| text += '.' | |
| return text | |
| def _continue_from_chain( | |
| self, | |
| chain: ReasoningChain, | |
| all_nodes: dict, | |
| all_edges: dict, | |
| vocab: dict, | |
| max_nodes: int = 3 | |
| ) -> str: | |
| """Extract a short continuation from a chain (for introductions).""" | |
| parts = [] | |
| node_count = 0 | |
| for item_id in chain.path: | |
| node = all_nodes.get(item_id) | |
| if node: | |
| node_count += 1 | |
| if node_count > 1: | |
| parts.append(node.content) | |
| if node_count >= max_nodes: | |
| break | |
| edge = all_edges.get(item_id) | |
| if edge and node_count >= 1: | |
| verb = self._select_relation_verb(edge.relation, vocab) | |
| parts.append(f" {verb} ") | |
| text = "".join(parts).strip() | |
| if text and not text.endswith('.'): | |
| text += '.' | |
| return text | |
| def _format_node_content(self, node: Node) -> str: | |
| """Format node content for inclusion in a sentence.""" | |
| content = node.content.strip() | |
| # Remove abstraction markers | |
| if content.startswith("[abstraction]") or content.startswith("[meta_abstraction]"): | |
| content = re.sub(r'^\[.*?\]\s*', '', content) | |
| # Truncate very long content | |
| if len(content) > 150: | |
| content = content[:147] + "..." | |
| return content | |
| def _select_relation_verb(self, relation: str, vocab: dict) -> str: | |
| """Select a natural language verb for a relation type.""" | |
| relation_verbs = vocab.get("relation_verbs", {}) | |
| verb_options = relation_verbs.get(relation, relation_verbs.get("related_to", [("berkaitan dengan", 1.0)])) | |
| texts = [v[0] for v in verb_options] | |
| weights = [v[1] for v in verb_options] | |
| return utils.weighted_choice(texts, weights, 0.7) | |
| def _get_relation_verb(self, chains, all_edges, vocab, chain_index=0) -> str: | |
| """Get the first relation verb from a chain.""" | |
| if chain_index >= len(chains): | |
| return "" | |
| for item_id in chains[chain_index].path: | |
| edge = all_edges.get(item_id) | |
| if edge: | |
| return self._select_relation_verb(edge.relation, vocab) | |
| return "" | |
| # ─────────────────────────────────────────────────── | |
| # SEGMENT CONNECTION | |
| # ─────────────────────────────────────────────────── | |
| def _connect_segments( | |
| self, | |
| segments: List[Tuple[str, str]], | |
| vocab: dict, | |
| personality: dict | |
| ) -> str: | |
| """Connect segments with appropriate connectors.""" | |
| if not segments: | |
| return "" | |
| result_parts = [] | |
| for i, (seg_type, seg_text) in enumerate(segments): | |
| if not seg_text.strip(): | |
| continue | |
| if i == 0: | |
| result_parts.append(seg_text) | |
| continue | |
| # Choose connector based on segment transition | |
| connector_type = self._choose_connector_type( | |
| segments[i - 1][0], seg_type | |
| ) | |
| connectors = vocab.get("connectors", {}).get( | |
| connector_type, | |
| vocab.get("connectors", {}).get("neutral", [("", 1.0)]) | |
| ) | |
| connector_texts = [c[0] for c in connectors] | |
| connector_weights = [c[1] for c in connectors] | |
| connector = utils.weighted_choice( | |
| connector_texts, connector_weights, 0.7 | |
| ) | |
| # Add paragraph break for longer responses | |
| if len(result_parts) >= 2 and len(seg_text) > 50: | |
| result_parts.append(f"\n\n{connector}{seg_text}") | |
| else: | |
| if connector: | |
| result_parts.append(f" {connector}{seg_text}") | |
| else: | |
| result_parts.append(f" {seg_text}") | |
| return "".join(result_parts) | |
| def _choose_connector_type(self, prev_segment: str, curr_segment: str) -> str: | |
| """Choose appropriate connector type based on segment transition.""" | |
| transition_map = { | |
| ("introduction", "main_explanation"): "neutral", | |
| ("main_explanation", "supporting_detail"): "addition", | |
| ("main_explanation", "elaboration"): "elaboration", | |
| ("main_explanation", "example"): "example", | |
| ("main_explanation", "inference"): "cause", | |
| ("main_explanation", "comparison"): "contrast", | |
| ("supporting_detail", "conclusion"): "conclusion", | |
| ("supporting_detail", "elaboration"): "elaboration", | |
| ("elaboration", "conclusion"): "conclusion", | |
| ("inference", "conclusion"): "conclusion", | |
| ("example", "conclusion"): "conclusion", | |
| ("acknowledgment_of_uncertainty", "context"): "neutral", | |
| ("acknowledgment_of_uncertainty", "suggestion"): "neutral", | |
| ("context", "suggestion"): "neutral", | |
| ("main_explanation", "conclusion"): "conclusion", | |
| ("comparison", "conclusion"): "conclusion", | |
| } | |
| return transition_map.get( | |
| (prev_segment, curr_segment), "addition" | |
| ) | |
| # ─────────────────────────────────────────────────── | |
| # PERSONALITY APPLICATION | |
| # ─────────────────────────────────────────────────── | |
| def _apply_personality(self, text: str, personality: dict, lang: str) -> str: | |
| """Apply personality parameters to the generated text.""" | |
| if not text: | |
| return text | |
| formality = personality.get("formality", config.DEFAULT_FORMALITY) | |
| warmth = personality.get("tone_warmth", 0.5) | |
| use_emoji = personality.get("use_emoji", False) | |
| name = personality.get("name") | |
| # ── Formality adjustment ── | |
| if formality < 0.3: | |
| text = self._make_casual(text, lang) | |
| elif formality > 0.7: | |
| text = self._make_formal(text, lang) | |
| # ── Emoji injection ── | |
| if use_emoji: | |
| text = self._inject_emoji(text) | |
| return text | |
| def _make_casual(self, text: str, lang: str) -> str: | |
| """Make text more casual/informal.""" | |
| if lang == "id": | |
| replacements = { | |
| "merupakan": "itu", | |
| "memiliki": "punya", | |
| "tidak": "nggak", | |
| "Dengan demikian": "Jadi", | |
| "Secara keseluruhan": "Intinya", | |
| "Oleh karena itu": "Makanya", | |
| "Berdasarkan pemahaman saya": "Setahu aku", | |
| "Selain itu": "Terus", | |
| "Akan tetapi": "Tapi", | |
| "Meskipun demikian": "Meski gitu", | |
| "Lebih lanjut": "Terus juga", | |
| "Pada intinya": "Pokoknya", | |
| } | |
| else: | |
| replacements = { | |
| "Furthermore": "Also", | |
| "Additionally": "Plus", | |
| "However": "But", | |
| "Nevertheless": "Still", | |
| "Therefore": "So", | |
| "In essence": "Basically", | |
| "It is worth noting": "Also worth noting", | |
| } | |
| for formal, casual in replacements.items(): | |
| text = text.replace(formal, casual) | |
| text = text.replace(formal.lower(), casual.lower()) | |
| return text | |
| def _make_formal(self, text: str, lang: str) -> str: | |
| """Make text more formal/academic.""" | |
| if lang == "id": | |
| replacements = { | |
| "punya": "memiliki", | |
| "nggak": "tidak", | |
| "gak": "tidak", | |
| "banget": "sangat", | |
| "kayak": "seperti", | |
| "kek": "seperti", | |
| "emang": "memang", | |
| } | |
| else: | |
| replacements = { | |
| "don't": "do not", | |
| "can't": "cannot", | |
| "won't": "will not", | |
| "it's": "it is", | |
| } | |
| for informal, formal in replacements.items(): | |
| text = text.replace(informal, formal) | |
| return text | |
| def _inject_emoji(self, text: str) -> str: | |
| """Add relevant emoji to text.""" | |
| emoji_map = { | |
| "penting": " ⚡", "baik": " 👍", "menarik": " ✨", | |
| "perhatian": " 👀", "contoh": " 📝", "informasi": " ℹ️", | |
| "proses": " ⚙️", "data": " 📊", "belajar": " 📚", | |
| "hubungan": " 🔗", "important": " ⚡", "good": " 👍", | |
| "interesting": " ✨", "example": " 📝", "process": " ⚙️", | |
| } | |
| for keyword, emoji in emoji_map.items(): | |
| if keyword in text.lower() and emoji not in text: | |
| # Add emoji after first occurrence | |
| idx = text.lower().find(keyword) | |
| end = idx + len(keyword) | |
| # Find end of word | |
| while end < len(text) and text[end].isalpha(): | |
| end += 1 | |
| text = text[:end] + emoji + text[end:] | |
| break # Only one emoji injection | |
| return text | |
| # ─────────────────────────────────────────────────── | |
| # MARKDOWN FORMATTING | |
| # ─────────────────────────────────────────────────── | |
| def _format_markdown( | |
| self, | |
| text: str, | |
| segments: List[Tuple[str, str]], | |
| intent: str | |
| ) -> str: | |
| """Apply markdown formatting based on content structure.""" | |
| if not text: | |
| return text | |
| # Short responses don't need heavy formatting | |
| if len(text) < 200: | |
| return text | |
| # ── List formatting for list intent ── | |
| if intent == "list": | |
| text = self._format_list_items(text) | |
| # ── Bold key terms ── | |
| text = self._apply_bold_emphasis(text) | |
| # Clean up excessive whitespace | |
| text = re.sub(r'\n{3,}', '\n\n', text) | |
| text = re.sub(r' {2,}', ' ', text) | |
| return text | |
| def _format_list_items(self, text: str) -> str: | |
| """Convert comma-separated items into markdown list if appropriate.""" | |
| # Detect patterns like "A, B, C, dan D" | |
| list_pattern = re.search( | |
| r'(?:antara lain|yaitu|meliputi|berikut|including|such as)[:\s]+' | |
| r'(.+?)(?:\.|$)', | |
| text, re.IGNORECASE | |
| ) | |
| if list_pattern: | |
| items_text = list_pattern.group(1) | |
| # Split by comma or "dan"/"and" | |
| items = re.split(r',\s*|\s+dan\s+|\s+and\s+', items_text) | |
| items = [item.strip().rstrip('.') for item in items if item.strip()] | |
| if len(items) >= 3: | |
| bullet_list = "\n".join(f"- {item}" for item in items) | |
| prefix = text[:list_pattern.start(1)] | |
| suffix = text[list_pattern.end():] | |
| return f"{prefix}\n\n{bullet_list}\n\n{suffix}" | |
| return text | |
| def _apply_bold_emphasis(self, text: str) -> str: | |
| """Apply bold to key terms that appear as node content.""" | |
| # Bold proper nouns and important terms (capitalized multi-word) | |
| # Only bold a few to avoid over-formatting | |
| bold_count = 0 | |
| words = text.split() | |
| for i, word in enumerate(words): | |
| clean = re.sub(r'[^\w]', '', word) | |
| if (clean and clean[0].isupper() and len(clean) > 2 | |
| and i > 0 and bold_count < 3): | |
| # Check it's not start of sentence | |
| prev = words[i - 1] if i > 0 else "" | |
| if not prev.endswith('.') and not prev.endswith('\n'): | |
| words[i] = word.replace(clean, f"**{clean}**") | |
| bold_count += 1 | |
| return " ".join(words) | |
| # ─────────────────────────────────────────────────── | |
| # GREETING HANDLER | |
| # ─────────────────────────────────────────────────── | |
| def _generate_greeting( | |
| self, personality: dict, vocab: dict, lang: str | |
| ) -> str: | |
| """Generate a greeting response.""" | |
| openings = vocab.get("openings", {}).get("greeting", [("Halo! ", 1.0)]) | |
| opening_texts = [o[0] for o in openings] | |
| opening_weights = [o[1] for o in openings] | |
| greeting = utils.weighted_choice(opening_texts, opening_weights, 0.8) | |
| name = personality.get("name") | |
| if lang == "id": | |
| follow_ups = [ | |
| "Ada yang bisa saya bantu?", | |
| "Apa yang ingin kamu ketahui?", | |
| "Silakan tanyakan apa saja yang ingin kamu ketahui.", | |
| "Saya siap membantu. Ada pertanyaan?", | |
| "Senang bisa membantu. Apa yang ingin dibahas?", | |
| ] | |
| else: | |
| follow_ups = [ | |
| "How can I help you?", | |
| "What would you like to know?", | |
| "Feel free to ask me anything.", | |
| "I'm ready to help. What's on your mind?", | |
| ] | |
| follow_up = self._rng.choice(follow_ups) | |
| if name: | |
| if lang == "id": | |
| return f"{greeting}Saya {name}. {follow_up}" | |
| return f"{greeting}I'm {name}. {follow_up}" | |
| return f"{greeting}{follow_up}" | |
| # ─────────────────────────────────────────────────── | |
| # KNOWLEDGE EXTRACTION RESPONSE HELPER | |
| # ─────────────────────────────────────────────────── | |
| def generate_from_direct_nodes( | |
| self, | |
| nodes: List[Node], | |
| edges: List[Edge], | |
| query_analysis: dict, | |
| personality: dict, | |
| all_nodes: dict, | |
| lang: str = "id" | |
| ) -> str: | |
| """ | |
| Generate response directly from nodes and edges | |
| when no reasoning chains are available but nodes were found. | |
| Simpler than full chain-based generation. | |
| """ | |
| if not nodes: | |
| return "" | |
| vocab = VOCAB.get(lang, VOCAB["id"]) | |
| self._rng = utils.seeded_random(utils.variation_seed()) | |
| parts = [] | |
| entities = query_analysis.get("entities", []) | |
| subject = entities[0] if entities else nodes[0].content | |
| # Opening | |
| intent = query_analysis.get("intent", "general") | |
| openings = vocab.get("openings", {}).get(intent, vocab["openings"]["general"]) | |
| opening = utils.weighted_choice( | |
| [o[0] for o in openings], | |
| [o[1] for o in openings], 0.7 | |
| ).replace("{subject}", subject) | |
| parts.append(opening) | |
| # Content from nodes | |
| used_contents = set() | |
| for node in nodes[:5]: | |
| if node.content in used_contents: | |
| continue | |
| used_contents.add(node.content) | |
| content = node.content.strip() | |
| if len(content) > 200: | |
| content = content[:197] + "..." | |
| # Find connecting edges | |
| relevant_edges = [ | |
| e for e in edges | |
| if e.from_node == node.id or e.to_node == node.id | |
| ] | |
| if relevant_edges: | |
| edge = relevant_edges[0] | |
| verb = self._select_relation_verb(edge.relation, vocab) | |
| other_id = edge.to_node if edge.from_node == node.id else edge.from_node | |
| other_node = all_nodes.get(other_id) | |
| if other_node and other_node.content not in used_contents: | |
| parts.append(f"{content} {verb} {other_node.content}.") | |
| used_contents.add(other_node.content) | |
| continue | |
| parts.append(f"{content}.") | |
| text = " ".join(parts) | |
| text = self._apply_personality(text, personality, lang) | |
| return text.strip() |