Spaces:
Sleeping
Sleeping
guangliang.yin commited on
Commit ·
b0c2444
1
Parent(s): 11b895a
文章id优化
Browse files
app.py
CHANGED
|
@@ -20,10 +20,24 @@ from langchain.chains.llm import LLMChain
|
|
| 20 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 21 |
from langchain.chains import StuffDocumentsChain
|
| 22 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
| 23 |
|
| 24 |
chain: Optional[Callable] = None
|
| 25 |
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
| 28 |
if not file:
|
| 29 |
return "please upload file"
|
|
@@ -38,11 +52,17 @@ def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
|
| 38 |
return "embeddings not"
|
| 39 |
|
| 40 |
texts = [d.page_content for d in docs]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
docsearch = Zilliz.from_documents(
|
| 43 |
docs,
|
| 44 |
embedding=embeddings,
|
| 45 |
-
ids=
|
| 46 |
connection_args={
|
| 47 |
"uri": zilliz_uri,
|
| 48 |
"user": user,
|
|
|
|
| 20 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 21 |
from langchain.chains import StuffDocumentsChain
|
| 22 |
from langchain_core.prompts import PromptTemplate
|
| 23 |
+
import hashlib
|
| 24 |
|
| 25 |
chain: Optional[Callable] = None
|
| 26 |
|
| 27 |
|
| 28 |
+
def generate_article_id(content):
|
| 29 |
+
# 使用SHA-256哈希算法
|
| 30 |
+
sha256 = hashlib.sha256()
|
| 31 |
+
|
| 32 |
+
# 将文章内容编码为字节流并更新哈希对象
|
| 33 |
+
sha256.update(content.encode('utf-8'))
|
| 34 |
+
|
| 35 |
+
# 获取哈希值的十六进制表示
|
| 36 |
+
article_id = sha256.hexdigest()
|
| 37 |
+
|
| 38 |
+
return article_id
|
| 39 |
+
|
| 40 |
+
|
| 41 |
def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
| 42 |
if not file:
|
| 43 |
return "please upload file"
|
|
|
|
| 52 |
return "embeddings not"
|
| 53 |
|
| 54 |
texts = [d.page_content for d in docs]
|
| 55 |
+
article_ids = []
|
| 56 |
+
# 遍历texts列表
|
| 57 |
+
for text in texts:
|
| 58 |
+
# 使用generate_article_id函数生成文章ID,并将其添加到article_ids列表中
|
| 59 |
+
article_id = generate_article_id(text)
|
| 60 |
+
article_ids.append(article_id)
|
| 61 |
|
| 62 |
docsearch = Zilliz.from_documents(
|
| 63 |
docs,
|
| 64 |
embedding=embeddings,
|
| 65 |
+
ids=article_ids,
|
| 66 |
connection_args={
|
| 67 |
"uri": zilliz_uri,
|
| 68 |
"user": user,
|