Ruhivig65 commited on
Commit
43ec02a
·
verified ·
1 Parent(s): 237370b

Upload 4 files

Browse files
app/api/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ # API Routes Package
app/api/routes_download.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ============================================
3
+ Download Routes
4
+ - Download complete novel as .txt file
5
+ - Get chapter info/counts
6
+ ============================================
7
+ """
8
+
9
+ import io
10
+ import logging
11
+ from fastapi import APIRouter, Depends, HTTPException
12
+ from fastapi.responses import StreamingResponse
13
+ from sqlalchemy.ext.asyncio import AsyncSession
14
+
15
+ from app.database.connection import get_db_session
16
+ from app.database.crud import (
17
+ get_novel_by_id,
18
+ get_chapters_for_novel,
19
+ get_chapter_count,
20
+ get_total_word_count,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+ router = APIRouter(prefix="/api/download", tags=["Download"])
25
+
26
+
27
+ @router.get("/{novel_id}/info")
28
+ async def get_download_info(
29
+ novel_id: int,
30
+ db: AsyncSession = Depends(get_db_session),
31
+ ):
32
+ """
33
+ Get info about downloadable content for a novel.
34
+ Shows chapter count, word count, etc.
35
+ """
36
+ novel = await get_novel_by_id(db, novel_id)
37
+ if not novel:
38
+ raise HTTPException(status_code=404, detail="Novel not found")
39
+
40
+ chapter_count = await get_chapter_count(db, novel_id)
41
+ word_count = await get_total_word_count(db, novel_id)
42
+
43
+ return {
44
+ "novel_id": novel_id,
45
+ "title": novel.title,
46
+ "status": novel.status.value,
47
+ "chapter_count": chapter_count,
48
+ "word_count": word_count,
49
+ "estimated_pages": word_count // 250 if word_count else 0,
50
+ "downloadable": chapter_count > 0,
51
+ }
52
+
53
+
54
+ @router.get("/{novel_id}/txt")
55
+ async def download_novel_txt(
56
+ novel_id: int,
57
+ db: AsyncSession = Depends(get_db_session),
58
+ ):
59
+ """
60
+ Download the complete novel as a .txt file.
61
+
62
+ Format:
63
+ ========================================
64
+ NOVEL TITLE
65
+ ========================================
66
+
67
+ --- Chapter 1: Title ---
68
+
69
+ Chapter content...
70
+
71
+ --- Chapter 2: Title ---
72
+
73
+ Chapter content...
74
+ """
75
+ novel = await get_novel_by_id(db, novel_id)
76
+ if not novel:
77
+ raise HTTPException(status_code=404, detail="Novel not found")
78
+
79
+ chapters = await get_chapters_for_novel(db, novel_id)
80
+ if not chapters:
81
+ raise HTTPException(
82
+ status_code=404,
83
+ detail="No chapters found for this novel. Scrape some chapters first!",
84
+ )
85
+
86
+ # --- Build the text file content ---
87
+ lines = []
88
+
89
+ # Header
90
+ lines.append("=" * 60)
91
+ lines.append(f" {novel.title}")
92
+ lines.append("=" * 60)
93
+ lines.append(f" Source: {novel.url}")
94
+ lines.append(f" Chapters: {len(chapters)}")
95
+
96
+ total_words = sum(ch.word_count for ch in chapters)
97
+ lines.append(f" Total Words: {total_words:,}")
98
+ lines.append(f" Generated by Novel Scraper Pro")
99
+ lines.append("=" * 60)
100
+ lines.append("")
101
+ lines.append("")
102
+
103
+ # Chapters
104
+ for chapter in chapters:
105
+ lines.append("-" * 50)
106
+ lines.append(f" Chapter {chapter.chapter_number}: {chapter.title}")
107
+ lines.append("-" * 50)
108
+ lines.append("")
109
+ lines.append(chapter.content)
110
+ lines.append("")
111
+ lines.append("")
112
+
113
+ # Footer
114
+ lines.append("=" * 60)
115
+ lines.append(" END OF NOVEL")
116
+ lines.append("=" * 60)
117
+
118
+ # --- Create the file as a stream ---
119
+ content = "\n".join(lines)
120
+
121
+ # Encode to bytes
122
+ file_bytes = content.encode("utf-8")
123
+
124
+ # Create a streaming response
125
+ buffer = io.BytesIO(file_bytes)
126
+
127
+ # Clean filename
128
+ safe_title = "".join(
129
+ c for c in novel.title if c.isalnum() or c in (' ', '-', '_')
130
+ ).strip()
131
+ safe_title = safe_title[:100] # Max filename length
132
+ filename = f"{safe_title}.txt"
133
+
134
+ logger.info(
135
+ f"Download: Novel {novel_id} '{novel.title}' - "
136
+ f"{len(chapters)} chapters, {total_words:,} words"
137
+ )
138
+
139
+ return StreamingResponse(
140
+ buffer,
141
+ media_type="text/plain; charset=utf-8",
142
+ headers={
143
+ "Content-Disposition": f'attachment; filename="{filename}"',
144
+ "Content-Length": str(len(file_bytes)),
145
+ },
146
+ )
147
+
148
+
149
+ @router.get("/{novel_id}/chapters")
150
+ async def list_chapters(
151
+ novel_id: int,
152
+ db: AsyncSession = Depends(get_db_session),
153
+ ):
154
+ """
155
+ List all saved chapters for a novel (without full content).
156
+ Useful for the UI to show progress.
157
+ """
158
+ novel = await get_novel_by_id(db, novel_id)
159
+ if not novel:
160
+ raise HTTPException(status_code=404, detail="Novel not found")
161
+
162
+ chapters = await get_chapters_for_novel(db, novel_id)
163
+
164
+ return {
165
+ "novel_id": novel_id,
166
+ "title": novel.title,
167
+ "total_chapters": len(chapters),
168
+ "chapters": [
169
+ {
170
+ "number": ch.chapter_number,
171
+ "title": ch.title,
172
+ "word_count": ch.word_count,
173
+ "url": ch.url,
174
+ "scraped_at": ch.scraped_at.isoformat() if ch.scraped_at else None,
175
+ }
176
+ for ch in chapters
177
+ ],
178
+ }
app/api/routes_intervention.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ============================================
3
+ Manual Intervention Routes
4
+ - View captcha screenshots
5
+ - Click on captcha remotely
6
+ - Type text into fields remotely
7
+ - Mark intervention as resolved
8
+ ============================================
9
+ """
10
+
11
+ import os
12
+ import logging
13
+ from typing import Optional
14
+ from fastapi import APIRouter, HTTPException, status
15
+ from fastapi.responses import FileResponse
16
+ from pydantic import BaseModel, Field
17
+
18
+ from app.scraper.browser_manager import browser_manager
19
+ from app.scraper.captcha_detector import captcha_detector
20
+ from app.scraper.scraper_engine import scraper_status, update_status
21
+ from app.config import settings
22
+
23
+ logger = logging.getLogger(__name__)
24
+ router = APIRouter(prefix="/api/intervention", tags=["Intervention"])
25
+
26
+
27
+ # ============================================
28
+ # Request Models
29
+ # ============================================
30
+ class ClickRequest(BaseModel):
31
+ """Request to click at coordinates on a novel's page."""
32
+ novel_id: int = Field(..., examples=[1])
33
+ x: int = Field(..., ge=0, examples=[500])
34
+ y: int = Field(..., ge=0, examples=[300])
35
+
36
+
37
+ class TypeRequest(BaseModel):
38
+ """Request to type text into a field on a novel's page."""
39
+ novel_id: int = Field(..., examples=[1])
40
+ selector: str = Field(..., examples=["input#captcha-input"])
41
+ text: str = Field(..., examples=["abc123"])
42
+
43
+
44
+ class ResolveRequest(BaseModel):
45
+ """Request to mark an intervention as resolved."""
46
+ novel_id: int = Field(..., examples=[1])
47
+
48
+
49
+ class RefreshScreenshotRequest(BaseModel):
50
+ """Request a fresh screenshot."""
51
+ novel_id: int = Field(..., examples=[1])
52
+
53
+
54
+ # ============================================
55
+ # Routes
56
+ # ============================================
57
+ @router.get("/active")
58
+ async def get_active_interventions():
59
+ """
60
+ Get all novels currently waiting for manual intervention.
61
+ The frontend polls this to show captcha alerts.
62
+ """
63
+ interventions = captcha_detector.get_all_interventions()
64
+
65
+ result = {}
66
+ for novel_id, info in interventions.items():
67
+ result[novel_id] = {
68
+ "novel_id": novel_id,
69
+ "screenshot": info.get("screenshot", ""),
70
+ "reason": info.get("reason", "Unknown"),
71
+ "page_url": info.get("page_url", ""),
72
+ "timestamp": info.get("timestamp", 0),
73
+ "waiting": info.get("waiting", True),
74
+ }
75
+
76
+ return {
77
+ "count": len(result),
78
+ "interventions": result,
79
+ }
80
+
81
+
82
+ @router.get("/screenshot/{filename}")
83
+ async def get_screenshot(filename: str):
84
+ """
85
+ Serve a captcha screenshot image.
86
+ The frontend displays this so the user can see the captcha.
87
+ """
88
+ # Security: prevent directory traversal
89
+ safe_filename = os.path.basename(filename)
90
+ filepath = os.path.join(settings.SCREENSHOTS_DIR, safe_filename)
91
+
92
+ if not os.path.exists(filepath):
93
+ raise HTTPException(
94
+ status_code=404,
95
+ detail=f"Screenshot not found: {safe_filename}",
96
+ )
97
+
98
+ return FileResponse(
99
+ filepath,
100
+ media_type="image/png",
101
+ filename=safe_filename,
102
+ )
103
+
104
+
105
+ @router.post("/click")
106
+ async def remote_click(request: ClickRequest):
107
+ """
108
+ Click at specific coordinates on a novel's browser page.
109
+
110
+ How it works:
111
+ 1. User sees the screenshot in the UI
112
+ 2. User clicks on the captcha in the screenshot
113
+ 3. Frontend sends the click coordinates here
114
+ 4. Backend performs the actual click on the headless browser
115
+ """
116
+ novel_id = request.novel_id
117
+
118
+ # Verify the novel has an active page
119
+ page = browser_manager.get_page(novel_id)
120
+ if page is None or page.is_closed():
121
+ raise HTTPException(
122
+ status_code=404,
123
+ detail=f"No active browser page for Novel {novel_id}",
124
+ )
125
+
126
+ try:
127
+ await browser_manager.click_at_coordinates(novel_id, request.x, request.y)
128
+
129
+ logger.info(f"Remote click at ({request.x}, {request.y}) for Novel {novel_id}")
130
+
131
+ # Take a new screenshot after clicking to show the result
132
+ import time
133
+ new_filename = f"novel_{novel_id}_after_click_{int(time.time())}.png"
134
+ new_screenshot = await browser_manager.take_screenshot(novel_id, new_filename)
135
+
136
+ return {
137
+ "message": f"Clicked at ({request.x}, {request.y})",
138
+ "new_screenshot": new_filename if new_screenshot else None,
139
+ }
140
+
141
+ except Exception as e:
142
+ logger.error(f"Remote click failed for Novel {novel_id}: {e}")
143
+ raise HTTPException(
144
+ status_code=500,
145
+ detail=f"Click failed: {str(e)}",
146
+ )
147
+
148
+
149
+ @router.post("/type")
150
+ async def remote_type(request: TypeRequest):
151
+ """
152
+ Type text into a field on a novel's browser page.
153
+ Useful for text-based captchas.
154
+ """
155
+ novel_id = request.novel_id
156
+
157
+ page = browser_manager.get_page(novel_id)
158
+ if page is None or page.is_closed():
159
+ raise HTTPException(
160
+ status_code=404,
161
+ detail=f"No active browser page for Novel {novel_id}",
162
+ )
163
+
164
+ try:
165
+ await browser_manager.type_text(novel_id, request.selector, request.text)
166
+
167
+ logger.info(
168
+ f"Remote type into '{request.selector}' for Novel {novel_id}"
169
+ )
170
+
171
+ return {
172
+ "message": f"Typed '{request.text}' into '{request.selector}'",
173
+ }
174
+
175
+ except Exception as e:
176
+ logger.error(f"Remote type failed for Novel {novel_id}: {e}")
177
+ raise HTTPException(
178
+ status_code=500,
179
+ detail=f"Type failed: {str(e)}",
180
+ )
181
+
182
+
183
+ @router.post("/resolve")
184
+ async def resolve_intervention(request: ResolveRequest):
185
+ """
186
+ Mark a captcha intervention as resolved.
187
+
188
+ Call this after:
189
+ 1. You've clicked on the captcha via /click
190
+ 2. The captcha appears to be solved
191
+ 3. You want the scraper to continue
192
+ """
193
+ novel_id = request.novel_id
194
+
195
+ intervention = captcha_detector.get_intervention_status(novel_id)
196
+ if not intervention:
197
+ raise HTTPException(
198
+ status_code=404,
199
+ detail=f"No active intervention for Novel {novel_id}",
200
+ )
201
+
202
+ captcha_detector.mark_intervention_complete(novel_id)
203
+
204
+ update_status(
205
+ novel_id,
206
+ phase="resuming",
207
+ message="Intervention resolved! Resuming scraping...",
208
+ )
209
+
210
+ logger.info(f"Intervention resolved for Novel {novel_id} ✅")
211
+
212
+ return {
213
+ "message": f"Intervention for Novel {novel_id} marked as resolved",
214
+ "novel_id": novel_id,
215
+ }
216
+
217
+
218
+ @router.post("/refresh-screenshot")
219
+ async def refresh_screenshot(request: RefreshScreenshotRequest):
220
+ """
221
+ Take a fresh screenshot of the novel's current page.
222
+ Use this to see the current state after clicking.
223
+ """
224
+ novel_id = request.novel_id
225
+
226
+ page = browser_manager.get_page(novel_id)
227
+ if page is None or page.is_closed():
228
+ raise HTTPException(
229
+ status_code=404,
230
+ detail=f"No active browser page for Novel {novel_id}",
231
+ )
232
+
233
+ try:
234
+ import time
235
+ filename = f"novel_{novel_id}_refresh_{int(time.time())}.png"
236
+ screenshot_path = await browser_manager.take_screenshot(novel_id, filename)
237
+
238
+ if screenshot_path is None:
239
+ raise HTTPException(
240
+ status_code=500,
241
+ detail="Failed to take screenshot",
242
+ )
243
+
244
+ return {
245
+ "screenshot": filename,
246
+ "page_url": page.url,
247
+ "page_title": await page.title(),
248
+ }
249
+
250
+ except HTTPException:
251
+ raise
252
+ except Exception as e:
253
+ logger.error(f"Refresh screenshot failed for Novel {novel_id}: {e}")
254
+ raise HTTPException(
255
+ status_code=500,
256
+ detail=f"Screenshot failed: {str(e)}",
257
+ )
app/api/routes_scraper.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ============================================
3
+ Scraper API Routes
4
+ - Add novels
5
+ - Start/Stop scraping
6
+ - Get live status
7
+ - Delete novels
8
+ ============================================
9
+ """
10
+
11
+ import logging
12
+ from typing import Optional, List
13
+ from fastapi import APIRouter, Depends, HTTPException, status
14
+ from pydantic import BaseModel, Field, HttpUrl
15
+ from sqlalchemy.ext.asyncio import AsyncSession
16
+
17
+ from app.database.connection import get_db_session
18
+ from app.database.crud import (
19
+ create_novel,
20
+ get_all_novels,
21
+ get_novel_by_id,
22
+ delete_novel,
23
+ update_novel_status,
24
+ )
25
+ from app.database.models import NovelStatus
26
+ from app.scraper.scraper_engine import (
27
+ start_scraping_novel,
28
+ stop_scraping_novel,
29
+ stop_all_scraping,
30
+ scraper_status,
31
+ get_active_task_ids,
32
+ )
33
+ from app.scraper.browser_manager import browser_manager
34
+
35
+ logger = logging.getLogger(__name__)
36
+ router = APIRouter(prefix="/api", tags=["Scraper"])
37
+
38
+
39
+ # ============================================
40
+ # Request/Response Models (Pydantic)
41
+ # ============================================
42
+ class NovelAddRequest(BaseModel):
43
+ """Request body for adding a new novel."""
44
+ title: str = Field(..., min_length=1, max_length=500, examples=["My Novel"])
45
+ url: str = Field(..., min_length=10, examples=["https://example.com/novel/chapter-1"])
46
+ login_email: Optional[str] = Field(None, examples=["user@email.com"])
47
+ login_password: Optional[str] = Field(None, examples=["password123"])
48
+ next_button_selector: Optional[str] = Field(
49
+ None,
50
+ examples=["a.next_page, a[rel='next'], .next-chap"]
51
+ )
52
+ content_selector: Optional[str] = Field(
53
+ None,
54
+ examples=[".chapter-content, .reading-content, #chapter-content"]
55
+ )
56
+
57
+
58
+ class NovelResponse(BaseModel):
59
+ """Response model for a novel."""
60
+ id: int
61
+ title: str
62
+ url: str
63
+ status: str
64
+ chapters_scraped: int
65
+ last_error: Optional[str] = None
66
+ needs_intervention: bool = False
67
+ screenshot_path: Optional[str] = None
68
+ current_url: Optional[str] = None
69
+
70
+ class Config:
71
+ from_attributes = True
72
+
73
+
74
+ class BatchAddRequest(BaseModel):
75
+ """Request body for adding multiple novels at once."""
76
+ novels: List[NovelAddRequest] = Field(..., min_length=1, max_length=15)
77
+ shared_email: Optional[str] = None
78
+ shared_password: Optional[str] = None
79
+
80
+
81
+ class StartScrapeRequest(BaseModel):
82
+ """Request body for starting scraping."""
83
+ novel_ids: Optional[List[int]] = Field(
84
+ None,
85
+ description="Specific novel IDs to scrape. If None, scrape all queued."
86
+ )
87
+
88
+
89
+ class StatusResponse(BaseModel):
90
+ """Live status of all scraping activities."""
91
+ active_browsers: int
92
+ max_browsers: int
93
+ browser_initialized: bool
94
+ novels: dict
95
+ active_task_ids: list
96
+
97
+
98
+ # ============================================
99
+ # Routes
100
+ # ============================================
101
+ @router.post("/novels", response_model=NovelResponse, status_code=status.HTTP_201_CREATED)
102
+ async def add_novel(
103
+ request: NovelAddRequest,
104
+ db: AsyncSession = Depends(get_db_session),
105
+ ):
106
+ """
107
+ Add a single novel to the database.
108
+ It will be in 'queued' status until scraping starts.
109
+ """
110
+ try:
111
+ novel = await create_novel(
112
+ db,
113
+ title=request.title,
114
+ url=request.url,
115
+ login_email=request.login_email,
116
+ login_password=request.login_password,
117
+ next_button_selector=request.next_button_selector,
118
+ content_selector=request.content_selector,
119
+ )
120
+ await db.commit()
121
+
122
+ logger.info(f"Novel added: {novel.title} (ID: {novel.id})")
123
+
124
+ return NovelResponse(
125
+ id=novel.id,
126
+ title=novel.title,
127
+ url=novel.url,
128
+ status=novel.status.value,
129
+ chapters_scraped=novel.chapters_scraped,
130
+ last_error=novel.last_error,
131
+ needs_intervention=novel.needs_intervention,
132
+ screenshot_path=novel.screenshot_path,
133
+ current_url=novel.current_url,
134
+ )
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error adding novel: {e}")
138
+ raise HTTPException(
139
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
140
+ detail=f"Failed to add novel: {str(e)}",
141
+ )
142
+
143
+
144
+ @router.post("/novels/batch", status_code=status.HTTP_201_CREATED)
145
+ async def add_novels_batch(
146
+ request: BatchAddRequest,
147
+ db: AsyncSession = Depends(get_db_session),
148
+ ):
149
+ """
150
+ Add multiple novels at once.
151
+ Optionally share login credentials across all novels.
152
+ """
153
+ added_novels = []
154
+ errors = []
155
+
156
+ for novel_req in request.novels:
157
+ try:
158
+ email = novel_req.login_email or request.shared_email
159
+ password = novel_req.login_password or request.shared_password
160
+
161
+ novel = await create_novel(
162
+ db,
163
+ title=novel_req.title,
164
+ url=novel_req.url,
165
+ login_email=email,
166
+ login_password=password,
167
+ next_button_selector=novel_req.next_button_selector,
168
+ content_selector=novel_req.content_selector,
169
+ )
170
+ added_novels.append({
171
+ "id": novel.id,
172
+ "title": novel.title,
173
+ "status": "queued",
174
+ })
175
+ except Exception as e:
176
+ errors.append({
177
+ "title": novel_req.title,
178
+ "error": str(e),
179
+ })
180
+
181
+ await db.commit()
182
+
183
+ return {
184
+ "added": added_novels,
185
+ "errors": errors,
186
+ "total_added": len(added_novels),
187
+ "total_errors": len(errors),
188
+ }
189
+
190
+
191
+ @router.get("/novels", response_model=List[NovelResponse])
192
+ async def list_novels(db: AsyncSession = Depends(get_db_session)):
193
+ """Get all novels with their current status."""
194
+ novels = await get_all_novels(db)
195
+ return [
196
+ NovelResponse(
197
+ id=n.id,
198
+ title=n.title,
199
+ url=n.url,
200
+ status=n.status.value,
201
+ chapters_scraped=n.chapters_scraped,
202
+ last_error=n.last_error,
203
+ needs_intervention=n.needs_intervention,
204
+ screenshot_path=n.screenshot_path,
205
+ current_url=n.current_url,
206
+ )
207
+ for n in novels
208
+ ]
209
+
210
+
211
+ @router.get("/novels/{novel_id}", response_model=NovelResponse)
212
+ async def get_novel(
213
+ novel_id: int,
214
+ db: AsyncSession = Depends(get_db_session),
215
+ ):
216
+ """Get a specific novel by ID."""
217
+ novel = await get_novel_by_id(db, novel_id)
218
+ if not novel:
219
+ raise HTTPException(status_code=404, detail="Novel not found")
220
+
221
+ return NovelResponse(
222
+ id=novel.id,
223
+ title=novel.title,
224
+ url=novel.url,
225
+ status=novel.status.value,
226
+ chapters_scraped=novel.chapters_scraped,
227
+ last_error=novel.last_error,
228
+ needs_intervention=novel.needs_intervention,
229
+ screenshot_path=novel.screenshot_path,
230
+ current_url=novel.current_url,
231
+ )
232
+
233
+
234
+ @router.delete("/novels/{novel_id}")
235
+ async def remove_novel(
236
+ novel_id: int,
237
+ db: AsyncSession = Depends(get_db_session),
238
+ ):
239
+ """Delete a novel and all its chapters."""
240
+ # Stop scraping first if active
241
+ await stop_scraping_novel(novel_id)
242
+
243
+ deleted = await delete_novel(db, novel_id)
244
+ if not deleted:
245
+ raise HTTPException(status_code=404, detail="Novel not found")
246
+
247
+ await db.commit()
248
+ return {"message": f"Novel {novel_id} deleted successfully"}
249
+
250
+
251
+ @router.post("/scrape/start")
252
+ async def start_scraping(
253
+ request: StartScrapeRequest = StartScrapeRequest(),
254
+ db: AsyncSession = Depends(get_db_session),
255
+ ):
256
+ """
257
+ Start scraping novels.
258
+ If novel_ids provided, scrape only those.
259
+ If not, scrape all novels in 'queued' status.
260
+ """
261
+ if not browser_manager.is_initialized:
262
+ raise HTTPException(
263
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
264
+ detail="Browser not initialized yet. Please wait and try again.",
265
+ )
266
+
267
+ novels = await get_all_novels(db)
268
+
269
+ if request.novel_ids:
270
+ novels_to_scrape = [n for n in novels if n.id in request.novel_ids]
271
+ else:
272
+ novels_to_scrape = [
273
+ n for n in novels
274
+ if n.status in [NovelStatus.QUEUED, NovelStatus.PAUSED_ERROR]
275
+ ]
276
+
277
+ if not novels_to_scrape:
278
+ raise HTTPException(
279
+ status_code=status.HTTP_404_NOT_FOUND,
280
+ detail="No novels found to scrape.",
281
+ )
282
+
283
+ started = []
284
+ for novel in novels_to_scrape:
285
+ novel_data = {
286
+ "url": novel.url,
287
+ "title": novel.title,
288
+ "login_email": novel.login_email,
289
+ "login_password": novel.login_password,
290
+ "next_button_selector": novel.next_button_selector,
291
+ "content_selector": novel.content_selector,
292
+ }
293
+
294
+ success = await start_scraping_novel(novel.id, novel_data)
295
+ if success:
296
+ started.append({"id": novel.id, "title": novel.title})
297
+
298
+ return {
299
+ "message": f"Started scraping {len(started)} novel(s)",
300
+ "started": started,
301
+ "max_concurrent": browser_manager.semaphore._value,
302
+ }
303
+
304
+
305
+ @router.post("/scrape/stop/{novel_id}")
306
+ async def stop_one_scraping(novel_id: int):
307
+ """Stop scraping a specific novel."""
308
+ stopped = await stop_scraping_novel(novel_id)
309
+ if stopped:
310
+ return {"message": f"Novel {novel_id} scraping stopped"}
311
+ else:
312
+ raise HTTPException(
313
+ status_code=404,
314
+ detail=f"Novel {novel_id} is not actively scraping",
315
+ )
316
+
317
+
318
+ @router.post("/scrape/stop-all")
319
+ async def stop_all():
320
+ """Stop all active scraping tasks."""
321
+ await stop_all_scraping()
322
+ return {"message": "All scraping tasks stopped"}
323
+
324
+
325
+ @router.get("/status", response_model=StatusResponse)
326
+ async def get_live_status():
327
+ """
328
+ Get real-time status of all scraping activities.
329
+ This endpoint is polled by the frontend every few seconds.
330
+ """
331
+ return StatusResponse(
332
+ active_browsers=browser_manager.active_count,
333
+ max_browsers=settings.MAX_CONCURRENT_BROWSERS,
334
+ browser_initialized=browser_manager.is_initialized,
335
+ novels=scraper_status,
336
+ active_task_ids=get_active_task_ids(),
337
+ )
338
+
339
+
340
+ # Need to import settings
341
+ from app.config import settings