Upload backend/hue_portal/chatbot/structured_legal.py with huggingface_hub
Browse files
backend/hue_portal/chatbot/structured_legal.py
CHANGED
|
@@ -68,8 +68,8 @@ def build_structured_legal_prompt(
|
|
| 68 |
"""Construct prompt instructing the LLM to return structured JSON."""
|
| 69 |
|
| 70 |
doc_blocks = []
|
| 71 |
-
#
|
| 72 |
-
for idx, doc in enumerate(documents[:
|
| 73 |
document = getattr(doc, "document", None)
|
| 74 |
title = getattr(document, "title", "") or "Không rõ tên văn bản"
|
| 75 |
code = getattr(document, "code", "") or "N/A"
|
|
@@ -77,7 +77,7 @@ def build_structured_legal_prompt(
|
|
| 77 |
section_title = getattr(doc, "section_title", "") or ""
|
| 78 |
page_range = _format_page_range(doc)
|
| 79 |
content = getattr(doc, "content", "") or ""
|
| 80 |
-
#
|
| 81 |
snippet = (content[:500] + "...") if len(content) > 500 else content
|
| 82 |
|
| 83 |
block = textwrap.dedent(
|
|
@@ -95,8 +95,8 @@ def build_structured_legal_prompt(
|
|
| 95 |
docs_text = "\n\n".join(doc_blocks)
|
| 96 |
reference_lines = []
|
| 97 |
title_section_pairs = []
|
| 98 |
-
#
|
| 99 |
-
for doc in documents[:
|
| 100 |
document = getattr(doc, "document", None)
|
| 101 |
title = getattr(document, "title", "") or "Không rõ tên văn bản"
|
| 102 |
section_code = getattr(doc, "section_code", "") or "Không rõ điều"
|
|
|
|
| 68 |
"""Construct prompt instructing the LLM to return structured JSON."""
|
| 69 |
|
| 70 |
doc_blocks = []
|
| 71 |
+
# 4 chunks for good context and speed balance
|
| 72 |
+
for idx, doc in enumerate(documents[:4], 1):
|
| 73 |
document = getattr(doc, "document", None)
|
| 74 |
title = getattr(document, "title", "") or "Không rõ tên văn bản"
|
| 75 |
code = getattr(document, "code", "") or "N/A"
|
|
|
|
| 77 |
section_title = getattr(doc, "section_title", "") or ""
|
| 78 |
page_range = _format_page_range(doc)
|
| 79 |
content = getattr(doc, "content", "") or ""
|
| 80 |
+
# Increased snippet to 500 chars to use more RAM and provide better context
|
| 81 |
snippet = (content[:500] + "...") if len(content) > 500 else content
|
| 82 |
|
| 83 |
block = textwrap.dedent(
|
|
|
|
| 95 |
docs_text = "\n\n".join(doc_blocks)
|
| 96 |
reference_lines = []
|
| 97 |
title_section_pairs = []
|
| 98 |
+
# 4 chunks to match doc_blocks for balance
|
| 99 |
+
for doc in documents[:4]:
|
| 100 |
document = getattr(doc, "document", None)
|
| 101 |
title = getattr(document, "title", "") or "Không rõ tên văn bản"
|
| 102 |
section_code = getattr(doc, "section_code", "") or "Không rõ điều"
|