Spaces:

shigureui
/

Lunyu-Critique-MCP

Sleeping

App Files Files Community

Lunyu-Critique-MCP / app.py

shigureui

doc

eff5a71 12 months ago

raw

history blame contribute delete

3.35 kB

	from whoosh.fields import TEXT, SchemaClass, ID
	from jieba.analyse import ChineseAnalyzer
	from whoosh.index import create_in
	import json

	analyzer = ChineseAnalyzer()
	class ArticleSchema(SchemaClass):
	index = ID(stored=True)
	原文 = TEXT(stored=True, analyzer=analyzer)
	注释 = TEXT(stored=True, analyzer=analyzer)
	批判 = TEXT(stored=True, analyzer=analyzer)
	章节 = TEXT(stored=True, analyzer=analyzer)


	schema = ArticleSchema()
	ix = create_in("indexdir", schema, indexname="article_index")
	writer = ix.writer()


	with open("反孔.json", encoding="utf-8") as json_file:
	raw_jsons = json.load(json_file)

	for vhjx_item in raw_jsons:
	for jvvi_item in vhjx_item[1:]:
	print(jvvi_item["index"])
	writer.add_document(
	index=jvvi_item["index"],
	原文=jvvi_item["原文"],
	注释=jvvi_item["注释"] if "注释" in jvvi_item else "",
	批判=jvvi_item["批判"] if "批判" in jvvi_item else "",
	章节=vhjx_item[0],
	)
	writer.commit()

	# init

	import gradio as gr
	from whoosh.qparser import QueryParser
	from whoosh.index import open_dir
	import re
	from whoosh.query import Term

	ix = open_dir("indexdir", indexname="article_index")
	searcher = ix.searcher()


	def search(query_info):
	query = QueryParser("原文", ix.schema).parse(query_info)
	results = searcher.search(query)

	map_hit = []
	for hit in results:
	批判文本 = hit.get("批判", "")
	matches = re.findall(r"\d+[\·\.]\d+", 批判文本)

	map_hit.append(dict(hit))
	map_hit[-1]["extra"] = []
	for index_ref in matches:
	index_ref_normalized = index_ref.replace(".", "·")

	term_query = Term("index", index_ref_normalized)
	related_results = searcher.search(term_query)

	for related_hit in related_results:
	map_hit[-1]["extra"].append(dict(related_hit))

	return map_hit


	def lunyu_search(query):
	"""
	Search for relevant critical commentary entries based on an input query from the Analects.

	This function parses the input query, performs a fuzzy search in the indexed original text field,
	and extracts related critiques. If any numeric index references (e.g., '3·2') are found in the
	commentary, it will further retrieve related entries using these references.

	Args:
	query (str): The input text (a line from the Analects, possibly fuzzy or partial) to search.

	Returns:
	List[dict]: A list of result entries. Each entry contains the original hit and a list of related entries
	under the key "extra", retrieved via index references mentioned in the commentary.
	"""
	return search(query_info=query)


	demo = gr.Interface(
	fn=lunyu_search,
	inputs=gr.Textbox(label="输入部分原文句子"),
	outputs=gr.JSON(label="查询结果"),
	title="论语批判MCP",
	description="输入模糊的论语原文，可以查询到对应的批判内容。",
	examples=[
	["季氏旅于泰山。"],
	["子曰：学而时习之，不亦说乎？"],
	["有朋自远方来，不亦乐乎？"],
	["三人行，必有我师焉。"],
	],
	)

	if __name__ == "__main__":
	res = search("季氏旅于泰山。")
	print(res)
	demo.launch(mcp_server=True)