Spaces:

QINGCHE
/

TSA

Sleeping

App Files Files Community

TSA / util.py

QINGCHE

unable baidu

02d932f over 2 years ago

raw

history blame contribute delete

2.97 kB

	import json
	import jieba
	import re
	import requests
	import backoff
	import time


	@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
	def post_url(url, headers, payload):
	time.sleep(1)
	response = requests.request("POST", url, headers=headers, data=payload)
	return response


	def seg(text):
	text = text.replace('\n', " ")
	sentences = re.split(r'(?<=[。！？.!?: ])\s*', text)
	sentences = [string for string in sentences if string != '']
	return sentences


	def clean_text(text):
	text = text.replace('\n', "")
	text = re.sub(r"-", " ", text)
	text = re.sub(r"\d+/\d+/\d+", "", text) # 日期
	text = re.sub(r"[0-2]?[0-9]:[0-6][0-9]", "", text) # 时间
	text = re.sub(
	r"/[a-zA-Z][:\//\][A-Za-z0-9\-_]+\.+[A-Za-z0-9\.\/%&=\?\-_]+/i", "", text) # 网址
	pure_text = ''
	for letter in text:
	if letter.isalpha() or letter == ' ':
	pure_text += letter

	text = ' '.join(word for word in pure_text.split() if len(word) > 1)
	return text


	def article_to_group(groups, topics):
	para = {}
	for i in groups:
	if not i[1] in para:
	para[i[1]] = i[0]
	else:
	para[i[1]] = para[i[1]] + i[0]
	return para


	def generation(para, max_length):
	API_KEY = "IZt1uK9PAI0LiqleqT0cE30b"
	SECRET_KEY = "Xv5kHB8eyhNuI1B1G7fRgm2SIPdlxGxs"

	def get_access_token():

	url = "https://aip.baidubce.com/oauth/2.0/token"
	params = {"grant_type": "client_credentials",
	"client_id": API_KEY, "client_secret": SECRET_KEY}
	return str(requests.post(url, params=params).json().get("access_token"))

	url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary?charset=UTF-8&access_token=" + get_access_token()
	topic = {}
	Ai_abstract = []
	for i, (j, k) in enumerate(para.items()):
	input_text = k
	# print(k)
	payload = json.dumps({
	"content": k,
	"max_summary_len": max_length
	})
	headers = {
	'Content-Type': 'application/json',
	'Accept': 'application/json'
	}

	# response = post_url(url, headers, payload)
	# text_dict = json.loads(response.text)
	# print(text_dict)
	# while('summary' not in text_dict.keys()):
	# response = post_url(url, headers, payload)
	# text_dict = json.loads(response.text)
	# print("ReTrying")

	# topic[text_dict['summary']] = (j, k)
	# Ai_abstract.append(text_dict['summary'])
	topic[j] = (j, k)
	Ai_abstract.append(j)
	return topic,Ai_abstract
	def formate_text(title_dict,outline_list):
	formated = []
	for each in outline_list:
	if(each not in title_dict.keys()):
	formated.append(f"# {each}")
	if(each in title_dict.keys()):
	formated.append(f"## {each}")
	formated.append(title_dict[each][1])
	return formated