Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| # import llama_cpp | |
| import base64 | |
| from Crypto.Cipher import AES | |
| from Crypto.Util.Padding import unpad | |
| def decrypt_file(input_path, key): | |
| # 读取加密文件 | |
| with open(input_path, "rb") as f: | |
| encrypted_data = base64.b64decode(f.read()) | |
| key = key.ljust(32, "0")[:32].encode("utf-8") | |
| iv = encrypted_data[:16] | |
| ciphertext = encrypted_data[16:] | |
| cipher = AES.new(key, AES.MODE_CBC, iv) | |
| plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size) | |
| return plaintext.decode("utf-8") | |
| # llm = llama_cpp.Llama.from_pretrained( | |
| # repo_id="mradermacher/bge-large-zh-v1.5-GGUF", | |
| # filename="bge-large-zh-v1.5.Q4_K_M.gguf", | |
| # embedding=True, | |
| # ) | |
| # embedding_1 = llm.create_embedding("Hello, world!") | |
| # embedding_2 = llm.create_embedding("你好, 世界!") # type(embedding_1['data'][0]['embedding']) list | |
| from openai import OpenAI | |
| import os | |
| client_oai = OpenAI( | |
| api_key=os.getenv("DASHSCOPE_API_KEY"), | |
| base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" # 百炼服务的base_url | |
| ) | |
| from pymilvus import MilvusClient | |
| client = MilvusClient("./books.db") | |
| client.create_collection(collection_name="collection_1", dimension=1024) | |
| import os, json | |
| aeskey = os.getenv("aeskey") | |
| decrypted_content = decrypt_file("encrypted.txt", aeskey) | |
| raw_jsons = json.loads(decrypted_content) | |
| with open("embeddings.json", mode="r") as embedding_file: | |
| all_embs = json.load(embedding_file) | |
| for vhjx_index, vhjx_item in enumerate(raw_jsons): | |
| chapter = vhjx_item[0] | |
| docs = [] | |
| metas = [] | |
| for jvvi_item in vhjx_item[1:]: | |
| content = jvvi_item["原文"] | |
| docs.append(content) | |
| metas.append( | |
| { | |
| "index": jvvi_item["index"], | |
| "text": content, | |
| "annotation": jvvi_item.get("注释", ""), | |
| "critique": jvvi_item.get("批判", ""), | |
| "chapter": chapter, | |
| } | |
| ) | |
| # 一个章节一次 | |
| # 批量生成 embeddings(每个为 list[float]) | |
| # emb_result = llm.create_embedding(docs) | |
| embeddings = all_embs[vhjx_index] # List[List[float]] | |
| print(len(embeddings)) | |
| # 准备数据 | |
| milvus_data = [] | |
| for i, emb in enumerate(embeddings): | |
| item = metas[i] | |
| milvus_data.append( | |
| { | |
| "id": vhjx_index * 100 + i, | |
| "index": item["index"], | |
| "vector": emb, | |
| "text": item["text"], | |
| "annotation": item["annotation"], | |
| "critique": item["critique"], | |
| "chapter": item["chapter"], | |
| } | |
| ) | |
| print(f"✅ 共 {len(milvus_data)} 条数据") | |
| # 插入数据 | |
| client.insert(collection_name="collection_1", data=milvus_data) | |
| print(f"✅ 插入完成:共 {len(milvus_data)} 条数据") | |
| def greet(name): | |
| """ | |
| Search for relevant critical commentary entries based on an input query from the Analects. | |
| This function parses the input query, performs a fuzzy search in the indexed original text field, | |
| and extracts related critiques. | |
| Args: | |
| query (str): The input text (a line from the Analects, possibly fuzzy or partial) to search. | |
| Returns: | |
| List[dict]: A list of result entries. Each entry contains the original hit and a list of related entries | |
| under the key "extra", retrieved via index references mentioned in the commentary. | |
| """ | |
| # embeddings = llm.create_embedding(name) | |
| completion = client_oai.embeddings.create( | |
| model="text-embedding-v3", | |
| input=name, | |
| dimensions=1024, # 仅 text-embedding-v3 支持 | |
| encoding_format="float" | |
| ) | |
| res = client.search( | |
| collection_name="collection_1", | |
| # data=[embeddings["data"][0]["embedding"]], | |
| data=[completion.data[0].embedding], | |
| limit=5, | |
| output_fields=["index", "text", "annotation", "critique"], | |
| ) | |
| return res | |
| demo = gr.Interface( | |
| fn=greet, | |
| inputs=gr.Textbox(label="输入部分原文句子"), | |
| outputs=gr.JSON(label="查询结果"), | |
| title="论语批判MCP (Embedding版本)", | |
| description="输入模糊的论语原文,可以向量检索到对应的批判内容。", | |
| ) | |
| demo.launch(mcp_server=True) | |