Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| HEADERS = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' | |
| } | |
| def _scrape_novel_list_page(page_url): | |
| response = requests.get(page_url, headers=HEADERS) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| results = [] | |
| main_content = soup.find('div', class_='col-truyen-main') | |
| if main_content: | |
| for row in main_content.find_all('div', class_='row'): | |
| title_element = row.find('h3', class_='truyen-title') | |
| if title_element and title_element.find('a'): | |
| title_anchor = title_element.find('a') | |
| title = title_anchor.text | |
| url = title_anchor['href'] | |
| author_element = row.find('span', class_='author') | |
| author = author_element.text.strip() if author_element else 'N/A' | |
| # --- بداية التعديل: استخراج الصورة --- | |
| image_url = None | |
| # عادة ما تكون الصورة في div بجانب العنوان (مثلاً col-xs-3) | |
| # سنبحث عن أول صورة في السطر | |
| img_tag = row.find('img') | |
| if img_tag: | |
| # قد تكون الصورة في src أو data-cover/data-src | |
| if 'src' in img_tag.attrs: | |
| image_url = img_tag['src'] | |
| elif 'data-src' in img_tag.attrs: | |
| image_url = img_tag['data-src'] | |
| elif 'data-cover' in img_tag.attrs: | |
| image_url = img_tag['data-cover'] | |
| # إصلاح الروابط النسبية | |
| if image_url and image_url.startswith('/'): | |
| image_url = f"https://novelfull.net{image_url}" | |
| # تعيين صورة افتراضية إذا لم توجد | |
| if not image_url: | |
| image_url = "https://placehold.co/200x300?text=No+Image" | |
| # --- نهاية التعديل --- | |
| results.append({ | |
| 'title': title, | |
| 'url': f"https://novelfull.net{url}", | |
| 'author': author, | |
| 'image_url': image_url # <-- إضافة هذا الحقل ضروري | |
| }) | |
| return results | |
| def search_novel(query, page=1): | |
| search_url = f"https://novelfull.net/search?keyword={query}&page={page}" | |
| return _scrape_novel_list_page(search_url) | |
| def get_latest_release_novels(page=1): | |
| url = f"https://novelfull.net/latest-release-novel?page={page}" | |
| return _scrape_novel_list_page(url) | |
| def get_hot_novels(page=1): | |
| url = f"https://novelfull.net/hot-novel?page={page}" | |
| return _scrape_novel_list_page(url) | |
| def get_completed_novels(page=1): | |
| url = f"https://novelfull.net/completed-novel?page={page}" | |
| return _scrape_novel_list_page(url) | |
| def get_most_popular_novels(page=1): | |
| url = f"https://novelfull.net/most-popular?page={page}" | |
| return _scrape_novel_list_page(url) | |
| def get_genres(): | |
| return [ | |
| "Shounen", "Horror", "Slice of Life", | |
| "Harem", "Drama", "Seinen", | |
| "Comedy", "Tragedy", "Lolicon", | |
| "Martial Arts", "Supernatural", "Adult", | |
| "School Life", "Ecchi", "Josei", | |
| "Mystery", "Xuanhuan", "Sports", | |
| "Shoujo", "Adventure", "Smut", | |
| "Romance", "Action", "Mecha", | |
| "Sci-fi", "Psychological", "Yaoi", | |
| "Gender Bender", "Xianxia", "Shounen Ai", | |
| "Mature", "Wuxia", "Magical Realism", | |
| "Fantasy", "Historical", "Video Games" | |
| ] | |
| def get_novels_by_genre(genre_name, page=1): | |
| genre_slug = genre_name.replace(' ', '-') | |
| genre_url = f"https://novelfull.net/genre/{genre_slug}?page={page}" | |
| return _scrape_novel_list_page(genre_url) | |
| def get_novel_content(url): | |
| response = requests.get(url, headers=HEADERS) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| title_element = soup.find('h3', class_='title') | |
| title = title_element.text if title_element else 'N/A' | |
| author = 'N/A' | |
| info_div = soup.find('div', class_='info') | |
| if info_div: | |
| author_div = info_div.find('h3', text='Author:') | |
| if author_div and author_div.find_next_sibling('a'): | |
| author = author_div.find_next_sibling('a').text | |
| image_url = 'N/A' | |
| book_div = soup.find('div', class_='book') | |
| if book_div and book_div.find('img'): | |
| image_tag = book_div.find('img') | |
| if 'src' in image_tag.attrs: | |
| src = image_tag['src'] | |
| if src.startswith('/'): | |
| image_url = f"https://novelfull.net{src}" | |
| else: | |
| image_url = src | |
| chapters = [] | |
| list_chapter_div = soup.find('div', id='list-chapter') | |
| if list_chapter_div: | |
| chapter_lists = list_chapter_div.select('.list-chapter') | |
| for chapter_list in chapter_lists: | |
| for chapter_item in chapter_list.find_all('a'): | |
| chapters.append({ | |
| 'title': chapter_item.get('title', chapter_item.text.strip()), | |
| 'url': f"https://novelfull.net{chapter_item['href']}" | |
| }) | |
| return { | |
| 'title': title, | |
| 'author': author, | |
| 'image_url': image_url, | |
| 'chapters': chapters | |
| } | |
| def get_chapter_content(url): | |
| response = requests.get(url, headers=HEADERS) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| content_element = soup.find('div', id='chapter-content') | |
| content = '' | |
| if content_element: | |
| paragraphs = content_element.find_all('p') | |
| if paragraphs: | |
| content = '\n\n'.join(p.get_text() for p in paragraphs) | |
| else: | |
| content = content_element.get_text(separator='\n\n') | |
| return { | |
| 'content': content.strip() if content else 'N/A' | |
| } | |