File size: 2,183 Bytes
77320e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import random
import re
import string
from urllib.parse import urlparse
def is_image_link(url):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
if any(url.endswith(ext) for ext in image_extensions):
return True
return False
def extract_filename_from_url(url):
parsed_url = urlparse(url)
return os.path.basename(parsed_url.path)
def clean_ansi(text):
return re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', text)
# Generate a random string of specified length
def generate_random_string(length=12):
characters = string.ascii_letters + string.digits # both upper and lowercase letters and digits
return ''.join(random.choice(characters) for i in range(length))
def extract_urls(text):
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
all_matched_urls = re.findall(url_pattern, text)
return all_matched_urls
def replace_latex_format(s):
# replace \\(...\\) format
s = re.sub(r'\\\((.*?)\\\)', r'$$\1$$', s, flags=re.DOTALL)
# replace \\[...\\] format
s = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', s, flags=re.DOTALL)
return s
def extract_and_replace_url(text):
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
all_matched_urls = re.findall(url_pattern, text)
# print(f"matched_urls: {all_matched_urls}")
text = text.replace("Generated an image: ", "")
text = text.replace("Generated files on server: ", "")
new_urls = []
for extracted_url in all_matched_urls:
if is_image_link(extracted_url):
new_url = f''
else:
filename = extract_filename_from_url(extracted_url)
new_url = f'[{filename}]({extracted_url})'
new_urls.append(new_url)
text = text.replace(extracted_url, "")
text = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', text)
return f'```python\n{text}```' + "\n" + "\n".join(new_urls)
def contains_chinese(input_context: str) -> bool:
for char in input_context:
if '\u4e00' <= char <= '\u9fff':
return True
return False
|