Spaces:
Runtime error
Runtime error
| # %% | |
| import textwrap | |
| from benedict import benedict | |
| from langchain_core.documents import Document | |
| def _fixed_width_wrap(text, width: int = 70, join_str: str = "\n"): | |
| return join_str.join(textwrap.wrap(text, width=width)) | |
| def fixed_width_wrap(text, width: int = 70, join_str: str = "\n", split_str="\n"): | |
| return join_str.join( | |
| [ | |
| _fixed_width_wrap(t, width=width, join_str=join_str) | |
| for t in text.split(split_str) | |
| ] | |
| ) | |
| def format_doc_minimal(d, fixed_width=False): | |
| if isinstance(d, Document): | |
| _cont = d.page_content | |
| _meta = benedict(d.metadata) | |
| else: | |
| _cont = d["page_content"] | |
| _meta = benedict(d["metadata"]) | |
| if fixed_width: | |
| _cont = _fixed_width_wrap(_cont) | |
| return """\ | |
| Title:\t{title} | |
| Published on:\t{pubdate} | |
| Source:\t{source_name} ({source_country}) | |
| Chunk Content: | |
| \t{cont} | |
| """.format( | |
| d=d, | |
| title=_meta.get("title"), | |
| pubdate=_meta.get("pubdate"), | |
| source_name=_meta.get("source.host") or _meta.get("source.id"), | |
| source_country=_meta.get("source.country", "n/a"), | |
| cont=_cont, | |
| ) | |
| def format_docs(docs, doc_fn=format_doc_minimal, **kwargs): | |
| return "\n---\n".join([doc_fn(d, **kwargs) for d in docs]) | |