Spaces:
Paused
Paused
| import requests | |
| import re | |
| from others import * | |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} | |
| def get_info_paipancon(link): | |
| # Send a GET request to the link and get the HTML response | |
| response = requests.get(link, headers=headers) | |
| html = response.text | |
| # Find the title | |
| title_start = html.find('"text-align:center;font-size:x-large;"><a>') + len('"text-align:center;font-size:x-large;"><a>') | |
| title_end = html.find('</a>', title_start) | |
| title = html[title_start:title_end] | |
| # Add spaces to the title if necessary | |
| title = re.sub(r'([a-z])([A-Z])', r'\1 \2', title) | |
| title = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', title) | |
| # # Translate Japanese text to english | |
| # if 'GirlsDelta' not in title: | |
| # title = translate_japanese_to_english(title) | |
| # Capitalize the first letter of each word in the title | |
| title = ' '.join([word[0].upper() + word[1:] for word in title.split()]) | |
| # Extract digits from the title | |
| digit = re.search(r'(?<=-)\d[\d_]*|^\d[\d_]*', title) | |
| if digit: | |
| digit = digit.group() | |
| # Remove leading zeros if there are 4 digits | |
| if digit and len(digit) == 4 and digit[0] == '0': | |
| digit = digit[1:] | |
| # Find the series | |
| series_start = html.find('>系列:') + len('>系列:') | |
| series_end = html.find('</a>', series_start) | |
| series = html[series_start:series_end] | |
| # Add spaces to the title if necessary | |
| series = re.sub(r'([a-z])([A-Z])', r'\1 \2', series) | |
| series = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', series) | |
| # # Translate Japanese text to english | |
| # if series == '一本道': | |
| # series = '1pondo' | |
| # elif series == 'ラフォーレガール': | |
| # series = 'LaForet Girl' | |
| # else: | |
| # series = translate_japanese_to_english(series) | |
| # Capitalize the first letter of each word in the title | |
| series = series.title() | |
| # Find the actress | |
| actress_start = html.find('女優:') + len('女優:') | |
| actress_end = html.find('</a>', actress_start) | |
| actress = html[actress_start:actress_end] | |
| # Convert Japanese text to romaji | |
| actress = convert_japanese_to_romaji(actress) | |
| # Add spaces to the title if necessary | |
| actress = re.sub(r'([a-z])([A-Z])', r'\1 \2', actress) | |
| actress = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', actress) | |
| # Capitalize the first letter of each word in the title | |
| actress = actress.title().replace(', ',' & ') | |
| # Find the server link | |
| server_link_start = html.find('https://op1') | |
| server_link_end = html.find("' type=", server_link_start) | |
| server_link = html[server_link_start:server_link_end] | |
| # Find the HQ link | |
| hq_link_start = html.find('https://fl') | |
| hq_link_end = html.find("' type=", hq_link_start) | |
| hq_link = html[hq_link_start:hq_link_end] | |
| # Return the results | |
| return title, series, actress, digit, server_link, hq_link | |
| def paipancon(): | |
| return None |