Buckets:

Tsukihjy
/

testcase

about 1 month ago

973 Bytes

	import re

	def contains_link_or_image(text):
	# 正则表达式匹配网页链接或图片链接
	url_pattern = r'(https?://[^\s]+)\|(www\.[^\s]+)\|([^\s]+\.jpg\|[^\s]+\.png\|[^\s]+\.gif\|[^\s]+\.jpeg)'

	# 使用re.search判断是否匹配到链接
	if re.search(url_pattern, text):
	return True
	return False

	def contains_html_tags(text):
	# 正则表达式匹配HTML标签
	html_tag_pattern = r'<[^>]+>'

	# 使用re.search判断是否匹配到HTML标签
	if re.search(html_tag_pattern, text):
	return True
	return False

	import json
	add_data = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/tcb_v7_add_data_no_en.jsonl", "r", encoding="utf-8"))

	check_list = []
	for item in add_data:
	tcb_id = item['tcb_id']
	content = item['query']
	if contains_html_tags(content) or contains_link_or_image(content):
	check_list.append(tcb_id)

	print(f"包含 HTML 标签")
	print("\n".join(check_list))

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.