File size: 8,693 Bytes
0da5fea
 
f96d6b9
0da5fea
 
 
 
 
f96d6b9
0da5fea
2082357
f96d6b9
0da5fea
 
 
 
 
 
 
 
 
2374f73
f96d6b9
0da5fea
 
 
 
 
 
 
 
 
 
f96d6b9
 
2082357
 
f96d6b9
 
 
 
 
0da5fea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f96d6b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0da5fea
 
 
 
 
 
 
 
 
 
 
f96d6b9
 
 
0da5fea
 
f96d6b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0da5fea
f96d6b9
 
6a4c5a7
f0f4e78
f96d6b9
0da5fea
f96d6b9
 
 
 
0da5fea
f0f4e78
 
f96d6b9
6a4c5a7
f0f4e78
6a4c5a7
f96d6b9
 
 
 
 
 
 
6a4c5a7
f96d6b9
0da5fea
 
f0f4e78
 
 
f96d6b9
0da5fea
 
f0f4e78
0da5fea
 
 
 
f96d6b9
 
0da5fea
 
f96d6b9
0da5fea
f96d6b9
0da5fea
f96d6b9
0da5fea
 
f0f4e78
 
 
 
0da5fea
f0f4e78
0da5fea
 
 
 
 
 
 
 
 
 
f96d6b9
f0f4e78
f96d6b9
 
0da5fea
 
 
f96d6b9
0da5fea
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel
import os
import requests
import json
import logging
from typing import Optional, Generator
import PyPDF2
import torch  # Moved to top for device check
from transformers import pipeline  # For summarization

# Suppress warnings for cleaner logs
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# OpenRouter API configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"

# Model suitable for explicit content
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "deepseek/deepseek-r1-0528:free")  # Updated to your successful model

app = FastAPI()

# Logging setup
logging.basicConfig(level=logging.DEBUG if os.getenv('LOG_LEVEL') == 'debug' else logging.INFO)
logger = logging.getLogger(__name__)

# Mount static files
app.mount("/static", StaticFiles(directory="static"), name="static")

# Summarization pipeline (loaded once)
try:
    device = 0 if torch.cuda.is_available() else -1
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
    logger.info("BART summarizer loaded successfully")
except Exception as e:
    logger.error(f"Failed to load BART: {e}. Using full transcript.")
    summarizer = None

def extract_text_from_file(file: UploadFile) -> str:
    """Extract text from uploaded file (TXT or PDF)."""
    if not file.filename.lower().endswith(('.txt', '.pdf')):
        raise HTTPException(status_code=400, detail="Only TXT or PDF files are supported.")
    
    content = ""
    try:
        if file.filename.lower().endswith('.txt'):
            content = file.file.read().decode('utf-8')
        elif file.filename.lower().endswith('.pdf'):
            pdf_reader = PyPDF2.PdfReader(file.file)
            for page in pdf_reader.pages:
                content += page.extract_text() + "\n"
        return content
    except Exception as e:
        logger.error(f"File extraction error: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to read file: {str(e)}")
    finally:
        file.file.close()

def summarize_transcript(transcript: str) -> str:
    """Summarize transcript using BART to reduce tokens."""
    if not summarizer or len(transcript) < 100:
        return transcript
    try:
        # Chunk if too long (BART max ~1024 tokens)
        chunks = [transcript[i:i+1000] for i in range(0, len(transcript), 1000)]
        summaries = []
        for chunk in chunks:
            if len(chunk) > 50:
                summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
                summaries.append(summary)
        summarized = " ".join(summaries)
        logger.info(f"Summarized {len(transcript)} chars to {len(summarized)} chars")
        return summarized
    except Exception as e:
        logger.error(f"Summarization failed: {e}")
        return transcript

def stream_openrouter(messages: list, model: str) -> Generator[str, None, None]:
    """Stream response from OpenRouter."""
    if not OPENROUTER_API_KEY:
        raise ValueError("OPENROUTER_API_KEY not set")
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        "HTTP-Referer": "https://huggingface.co/spaces/NoLev/PodcastToNovel",
        "X-Title": "Podcast to Novel Converter"
    }
    payload = {
        "model": model,
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 8000,
        "stream": True
    }
    try:
        logger.info(f"Streaming OpenRouter with model: {model}")
        with requests.post(OPENROUTER_API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
            if response.status_code != 200:
                raise HTTPException(status_code=response.status_code, detail=f"OpenRouter error: {response.text}")
            full_content = ""
            for line in response.iter_lines():
                if line:
                    decoded = line.decode('utf-8')
                    if decoded.startswith('data: '):
                        data = decoded[6:]
                        if data == '[DONE]':
                            break
                        try:
                            chunk = json.loads(data)
                            delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
                            if delta:
                                full_content += delta
                                yield delta  # Yield chunk for streaming
                        except json.JSONDecodeError:
                            continue
            logger.info(f"Streaming complete: {len(full_content)} chars")
    except Exception as e:
        logger.error(f"Streaming failed: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to stream: {str(e)}")

def generate_outline_stream(transcript: str, model: str, chapter_count: int, word_count: int, custom_prompt: Optional[str]) -> Generator[str, None, None]:
    """Generate and stream outline from (summarized) transcript."""
    if not transcript:
        yield "Error: Transcript is empty"
        return
    
    summarized = summarize_transcript(transcript)
    
    # Build dynamic system prompt
    base_prompt = f"""
    You are a creative novelist specializing in dark romance novels featuring open marriages, BDSM, and swinging lifestyles. Analyze the provided podcast transcript summary for themes, dialogues, character dynamics, and plot seeds. Explicit content is expected—generate mature, consensual erotic scenes with psychological depth, power imbalances, jealousy, and redemption arcs.

    Output ONLY a detailed, gripping chapter-by-chapter outline for a {word_count}-word novel ({chapter_count} chapters, ~{word_count // chapter_count} words each) in markdown format. No JSON, no intro/explanation. Structure each chapter as:

    ## Chapter X: Title
    - **Hook:** Pulse-pounding opening scene (e.g., mid-BDSM encounter).
    - **Plot Beats:** 4-6 key events with escalating tension, witty banter, open marriage twists.
    - **Erotic Encounters:** 2-3 explicit, sensory-rich BDSM/swinging scenes (consensual, detailed but concise).
    - **Conflict:** Core jealousy/power struggle.
    - **Dialogue Snippets:** 2-3 witty/emotional exchanges.
    - **Cliffhanger:** Shocking twist leading to next chapter.

    Ensure overall arc: rising erotic tension, mid-book betrayal/redemption, explosive finale. Expand ideas into addictive, original plot.
    """
    
    if custom_prompt:
        base_prompt = f"User instructions: {custom_prompt}. Adapt the outline accordingly.\n\n{base_prompt}"
    
    user_prompt = f"Generate dark romance outline from this transcript summary:\n\n{summarized}"
    
    messages = [
        {"role": "system", "content": base_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    try:
        for chunk in stream_openrouter(messages, model):
            yield chunk
    except Exception as e:
        logger.error(f"Generation error: {e}")
        yield f"Error: {str(e)}"

# Streaming endpoint
@app.post("/generate_novel")
async def generate_novel_stream(
    transcript: str = Form(None),
    file: Optional[UploadFile] = File(None),
    model: str = Form(DEFAULT_MODEL),
    chapter_count: int = Form(10),
    word_count: int = Form(90000),
    custom_prompt: Optional[str] = Form(None)
):
    logger.info(f"Starting streaming generation - Model: {model}, Chapters: {chapter_count}, Words: {word_count}")
    try:
        # Handle file upload if provided
        if file:
            transcript_text = extract_text_from_file(file)
            logger.info(f"Extracted {len(transcript_text)} chars from file {file.filename}")
        elif transcript:
            transcript_text = transcript
        else:
            raise HTTPException(status_code=400, detail="Provide either transcript text or a file.")
        
        return StreamingResponse(
            generate_outline_stream(transcript_text, model, chapter_count, word_count, custom_prompt),
            media_type="text/plain"
        )
    except HTTPException:
        raise
    except Exception as e:
        error_msg = f"Streaming failed: {str(e)}"
        logger.error(error_msg)
        raise HTTPException(status_code=500, detail=error_msg)

# Serve the frontend
@app.get("/")
async def serve_index():
    with open("static/index.html", "r") as f:
        return HTMLResponse(content=f.read())