Spaces:
Sleeping
Sleeping
| from whoosh.fields import TEXT, SchemaClass, ID | |
| from jieba.analyse import ChineseAnalyzer | |
| from whoosh.index import create_in | |
| import json | |
| analyzer = ChineseAnalyzer() | |
| class ArticleSchema(SchemaClass): | |
| index = ID(stored=True) | |
| 原文 = TEXT(stored=True, analyzer=analyzer) | |
| 注释 = TEXT(stored=True, analyzer=analyzer) | |
| 批判 = TEXT(stored=True, analyzer=analyzer) | |
| 章节 = TEXT(stored=True, analyzer=analyzer) | |
| schema = ArticleSchema() | |
| ix = create_in("indexdir", schema, indexname="article_index") | |
| writer = ix.writer() | |
| with open("反孔.json", encoding="utf-8") as json_file: | |
| raw_jsons = json.load(json_file) | |
| for vhjx_item in raw_jsons: | |
| for jvvi_item in vhjx_item[1:]: | |
| print(jvvi_item["index"]) | |
| writer.add_document( | |
| index=jvvi_item["index"], | |
| 原文=jvvi_item["原文"], | |
| 注释=jvvi_item["注释"] if "注释" in jvvi_item else "", | |
| 批判=jvvi_item["批判"] if "批判" in jvvi_item else "", | |
| 章节=vhjx_item[0], | |
| ) | |
| writer.commit() | |
| # init | |
| import gradio as gr | |
| from whoosh.qparser import QueryParser | |
| from whoosh.index import open_dir | |
| import re | |
| from whoosh.query import Term | |
| ix = open_dir("indexdir", indexname="article_index") | |
| searcher = ix.searcher() | |
| def search(query_info): | |
| query = QueryParser("原文", ix.schema).parse(query_info) | |
| results = searcher.search(query) | |
| map_hit = [] | |
| for hit in results: | |
| 批判文本 = hit.get("批判", "") | |
| matches = re.findall(r"\d+[\·\.]\d+", 批判文本) | |
| map_hit.append(dict(hit)) | |
| map_hit[-1]["extra"] = [] | |
| for index_ref in matches: | |
| index_ref_normalized = index_ref.replace(".", "·") | |
| term_query = Term("index", index_ref_normalized) | |
| related_results = searcher.search(term_query) | |
| for related_hit in related_results: | |
| map_hit[-1]["extra"].append(dict(related_hit)) | |
| return map_hit | |
| def lunyu_search(query): | |
| """ | |
| Search for relevant critical commentary entries based on an input query from the Analects. | |
| This function parses the input query, performs a fuzzy search in the indexed original text field, | |
| and extracts related critiques. If any numeric index references (e.g., '3·2') are found in the | |
| commentary, it will further retrieve related entries using these references. | |
| Args: | |
| query (str): The input text (a line from the Analects, possibly fuzzy or partial) to search. | |
| Returns: | |
| List[dict]: A list of result entries. Each entry contains the original hit and a list of related entries | |
| under the key "extra", retrieved via index references mentioned in the commentary. | |
| """ | |
| return search(query_info=query) | |
| demo = gr.Interface( | |
| fn=lunyu_search, | |
| inputs=gr.Textbox(label="输入部分原文句子"), | |
| outputs=gr.JSON(label="查询结果"), | |
| title="论语批判MCP", | |
| description="输入模糊的论语原文,可以查询到对应的批判内容。", | |
| examples=[ | |
| ["季氏旅于泰山。"], | |
| ["子曰:学而时习之,不亦说乎?"], | |
| ["有朋自远方来,不亦乐乎?"], | |
| ["三人行,必有我师焉。"], | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| res = search("季氏旅于泰山。") | |
| print(res) | |
| demo.launch(mcp_server=True) | |