Spaces:

iShare
/

Langchain-Summarization-Chain

Sleeping

App Files Files Community

iShare commited on Dec 7, 2023

Commit

951a4ca

1 Parent(s): 338c8af

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -23

app.py CHANGED Viewed

@@ -15,19 +15,6 @@ load_dotenv()
 hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
 repo_id=os.environ.get('repo_id')
-print(f"定义处理多余的Context文本的函数")
-def remove_context(text):
-    # 检查 'Context:' 是否存在
-    if 'Context:' in text:
-        # 找到第一个 '\n\n' 的位置
-        end_of_context = text.find('\n\n')
-        # 删除 'Context:' 到第一个 '\n\n' 之间的部分
-        return text[end_of_context + 2:]  # '+2' 是为了跳过两个换行符
-    else:
-        # 如果 'Context:' 不存在，返回原始文本
-        return text
-print(f"处理多余的Context文本函数定义结束")
 #port = os.getenv('port')
 #OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
@@ -41,6 +28,7 @@ llm = HuggingFaceHub(repo_id=repo_id,  # for StarChat
                                    "top_p": 0.95, "eos_token_id": 49155})
 #chain = load_summarize_chain(llm, chain_type="stuff")    #stuff模式容易导致出错：估计是超LLM的token限制所致
 chain = load_summarize_chain(llm, chain_type="refine")
 text_splitter_rcs = RecursiveCharacterTextSplitter(
@@ -58,25 +46,28 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
     try:
         #loader = WebBaseLoader("https://www.usinoip.com/")
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
-            print(url)
             loader = WebBaseLoader(url)
             docs = loader.load()
             split_docs = text_splitter_rcs.split_documents(docs)
             #result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
             result=chain.run(split_docs)
-            initial_ai_response=str(result) #找到之前总是POST Error的原因：chain.run(docs)的结果，格式不是str，导致程序错误
-            cleaned_initial_ai_response = remove_context(initial_ai_response)
-            print(cleaned_initial_ai_response)
-            print()
-            final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
-            new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
-            final_result = new_final_ai_response.split('Note:')[0].strip()
             #print("AI Summarization: "+result)   #这个会出错，原因见上方
             print("AI Summarization:")
             #print(result)
-            print(final_result)
             st.write("AI Summarization:")
             #st.write(result)
-            st.write(final_result)
     except Exception as e:
         st.write("Wrong URL or URL not parsable.")

 hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
 repo_id=os.environ.get('repo_id')
 #port = os.getenv('port')
 #OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
                                    "top_p": 0.95, "eos_token_id": 49155})
 #chain = load_summarize_chain(llm, chain_type="stuff")    #stuff模式容易导致出错：估计是超LLM的token限制所致
 chain = load_summarize_chain(llm, chain_type="refine")
 text_splitter_rcs = RecursiveCharacterTextSplitter(
     try:
         #loader = WebBaseLoader("https://www.usinoip.com/")
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
+            print("1")
             loader = WebBaseLoader(url)
+            print("2")
             docs = loader.load()
+            print(docs)
             split_docs = text_splitter_rcs.split_documents(docs)
+            print("3")
+            print(split_docs)
             #result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
             result=chain.run(split_docs)
+            print("4")
+            print(url)
+            ai_response=str(result)
             #print("AI Summarization: "+result)   #这个会出错，原因见上方
             print("AI Summarization:")
             #print(result)
+            print(ai_response)
             st.write("AI Summarization:")
             #st.write(result)
+            st.write(ai_response)
     except Exception as e:
         st.write("Wrong URL or URL not parsable.")