import requests import re from others import * headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} def get_info_paipancon(link): # Send a GET request to the link and get the HTML response response = requests.get(link, headers=headers) html = response.text # Find the title title_start = html.find('"text-align:center;font-size:x-large;">') + len('"text-align:center;font-size:x-large;">') title_end = html.find('', title_start) title = html[title_start:title_end] # Add spaces to the title if necessary title = re.sub(r'([a-z])([A-Z])', r'\1 \2', title) title = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', title) # # Translate Japanese text to english # if 'GirlsDelta' not in title: # title = translate_japanese_to_english(title) # Capitalize the first letter of each word in the title title = ' '.join([word[0].upper() + word[1:] for word in title.split()]) # Extract digits from the title digit = re.search(r'(?<=-)\d[\d_]*|^\d[\d_]*', title) if digit: digit = digit.group() # Remove leading zeros if there are 4 digits if digit and len(digit) == 4 and digit[0] == '0': digit = digit[1:] # Find the series series_start = html.find('>系列:') + len('>系列:') series_end = html.find('', series_start) series = html[series_start:series_end] # Add spaces to the title if necessary series = re.sub(r'([a-z])([A-Z])', r'\1 \2', series) series = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', series) # # Translate Japanese text to english # if series == '一本道': # series = '1pondo' # elif series == 'ラフォーレガール': # series = 'LaForet Girl' # else: # series = translate_japanese_to_english(series) # Capitalize the first letter of each word in the title series = series.title() # Find the actress actress_start = html.find('女優:') + len('女優:') actress_end = html.find('', actress_start) actress = html[actress_start:actress_end] # Convert Japanese text to romaji actress = convert_japanese_to_romaji(actress) # Add spaces to the title if necessary actress = re.sub(r'([a-z])([A-Z])', r'\1 \2', actress) actress = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', actress) # Capitalize the first letter of each word in the title actress = actress.title().replace(', ',' & ') # Find the server link server_link_start = html.find('https://op1') server_link_end = html.find("' type=", server_link_start) server_link = html[server_link_start:server_link_end] # Find the HQ link hq_link_start = html.find('https://fl') hq_link_end = html.find("' type=", hq_link_start) hq_link = html[hq_link_start:hq_link_end] # Return the results return title, series, actress, digit, server_link, hq_link def paipancon(): return None