Update app.py
Browse files
app.py
CHANGED
|
@@ -15,19 +15,6 @@ load_dotenv()
|
|
| 15 |
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
| 16 |
repo_id=os.environ.get('repo_id')
|
| 17 |
|
| 18 |
-
print(f"定义处理多余的Context文本的函数")
|
| 19 |
-
def remove_context(text):
|
| 20 |
-
# 检查 'Context:' 是否存在
|
| 21 |
-
if 'Context:' in text:
|
| 22 |
-
# 找到第一个 '\n\n' 的位置
|
| 23 |
-
end_of_context = text.find('\n\n')
|
| 24 |
-
# 删除 'Context:' 到第一个 '\n\n' 之间的部分
|
| 25 |
-
return text[end_of_context + 2:] # '+2' 是为了跳过两个换行符
|
| 26 |
-
else:
|
| 27 |
-
# 如果 'Context:' 不存在,返回原始文本
|
| 28 |
-
return text
|
| 29 |
-
print(f"处理多余的Context文本函数定义结束")
|
| 30 |
-
|
| 31 |
#port = os.getenv('port')
|
| 32 |
|
| 33 |
#OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
|
@@ -41,6 +28,7 @@ llm = HuggingFaceHub(repo_id=repo_id, # for StarChat
|
|
| 41 |
"top_p": 0.95, "eos_token_id": 49155})
|
| 42 |
|
| 43 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
|
|
|
| 44 |
chain = load_summarize_chain(llm, chain_type="refine")
|
| 45 |
|
| 46 |
text_splitter_rcs = RecursiveCharacterTextSplitter(
|
|
@@ -58,25 +46,28 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
|
|
| 58 |
try:
|
| 59 |
#loader = WebBaseLoader("https://www.usinoip.com/")
|
| 60 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
| 61 |
-
print(
|
| 62 |
loader = WebBaseLoader(url)
|
|
|
|
| 63 |
docs = loader.load()
|
|
|
|
| 64 |
split_docs = text_splitter_rcs.split_documents(docs)
|
|
|
|
|
|
|
| 65 |
#result=chain.run(docs) #这个result的格式比较特殊,可以直接print,但不可以和其他字符串联合print输出 - this step errors!
|
| 66 |
result=chain.run(split_docs)
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
|
| 72 |
-
new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
|
| 73 |
-
final_result = new_final_ai_response.split('Note:')[0].strip()
|
| 74 |
#print("AI Summarization: "+result) #这个会出错,原因见上方
|
| 75 |
print("AI Summarization:")
|
| 76 |
#print(result)
|
| 77 |
-
print(
|
|
|
|
| 78 |
st.write("AI Summarization:")
|
| 79 |
#st.write(result)
|
| 80 |
-
st.write(
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
st.write("Wrong URL or URL not parsable.")
|
|
|
|
| 15 |
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
| 16 |
repo_id=os.environ.get('repo_id')
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
#port = os.getenv('port')
|
| 19 |
|
| 20 |
#OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
|
|
|
| 28 |
"top_p": 0.95, "eos_token_id": 49155})
|
| 29 |
|
| 30 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
| 31 |
+
|
| 32 |
chain = load_summarize_chain(llm, chain_type="refine")
|
| 33 |
|
| 34 |
text_splitter_rcs = RecursiveCharacterTextSplitter(
|
|
|
|
| 46 |
try:
|
| 47 |
#loader = WebBaseLoader("https://www.usinoip.com/")
|
| 48 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
| 49 |
+
print("1")
|
| 50 |
loader = WebBaseLoader(url)
|
| 51 |
+
print("2")
|
| 52 |
docs = loader.load()
|
| 53 |
+
print(docs)
|
| 54 |
split_docs = text_splitter_rcs.split_documents(docs)
|
| 55 |
+
print("3")
|
| 56 |
+
print(split_docs)
|
| 57 |
#result=chain.run(docs) #这个result的格式比较特殊,可以直接print,但不可以和其他字符串联合print输出 - this step errors!
|
| 58 |
result=chain.run(split_docs)
|
| 59 |
+
print("4")
|
| 60 |
+
print(url)
|
| 61 |
+
ai_response=str(result)
|
| 62 |
+
|
|
|
|
|
|
|
|
|
|
| 63 |
#print("AI Summarization: "+result) #这个会出错,原因见上方
|
| 64 |
print("AI Summarization:")
|
| 65 |
#print(result)
|
| 66 |
+
print(ai_response)
|
| 67 |
+
|
| 68 |
st.write("AI Summarization:")
|
| 69 |
#st.write(result)
|
| 70 |
+
st.write(ai_response)
|
| 71 |
+
|
| 72 |
except Exception as e:
|
| 73 |
st.write("Wrong URL or URL not parsable.")
|