Spaces:

GilbertClaus
/

VideoCutter

Paused

File size: 3,026 Bytes

import requests
import re
from others import *

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}


def get_info_paipancon(link):
    # Send a GET request to the link and get the HTML response
    response = requests.get(link, headers=headers)
    html = response.text

    # Find the title
    title_start = html.find('"text-align:center;font-size:x-large;"><a>') + len('"text-align:center;font-size:x-large;"><a>')
    title_end = html.find('</a>', title_start)
    title = html[title_start:title_end]

    # Add spaces to the title if necessary
    title = re.sub(r'([a-z])([A-Z])', r'\1 \2', title)
    title = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', title)

    # # Translate Japanese text to english
    # if 'GirlsDelta' not in title:
    #     title = translate_japanese_to_english(title)

    # Capitalize the first letter of each word in the title
    title = ' '.join([word[0].upper() + word[1:] for word in title.split()])

    # Extract digits from the title
    digit = re.search(r'(?<=-)\d[\d_]*|^\d[\d_]*', title)

    if digit:
        digit = digit.group()

    # Remove leading zeros if there are 4 digits
    if digit and len(digit) == 4 and digit[0] == '0':
        digit = digit[1:]

    # Find the series
    series_start = html.find('>系列:') + len('>系列:')
    series_end = html.find('</a>', series_start)
    series = html[series_start:series_end]

    # Add spaces to the title if necessary
    series = re.sub(r'([a-z])([A-Z])', r'\1 \2', series)
    series = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', series)

    # # Translate Japanese text to english
    # if series == '一本道':
    #     series = '1pondo'
    # elif series == 'ラフォーレガール':
    #     series = 'LaForet Girl'
    # else:
    #     series = translate_japanese_to_english(series)

    # Capitalize the first letter of each word in the title
    series = series.title()

    # Find the actress
    actress_start = html.find('女優:') + len('女優:')
    actress_end = html.find('</a>', actress_start)
    actress = html[actress_start:actress_end]

    # Convert Japanese text to romaji
    actress = convert_japanese_to_romaji(actress)

    # Add spaces to the title if necessary
    actress = re.sub(r'([a-z])([A-Z])', r'\1 \2', actress)
    actress = re.sub(r'([A-Za-z])-([A-Za-z])', r'\1 - \2', actress)

    # Capitalize the first letter of each word in the title
    actress = actress.title().replace(', ',' & ')

    # Find the server link
    server_link_start = html.find('https://op1')
    server_link_end = html.find("' type=", server_link_start)
    server_link = html[server_link_start:server_link_end]

    # Find the HQ link
    hq_link_start = html.find('https://fl')
    hq_link_end = html.find("' type=", hq_link_start)
    hq_link = html[hq_link_start:hq_link_end]

    # Return the results
    return title, series, actress, digit, server_link, hq_link

def paipancon():
    return None