Spaces:

NoLev
/

NovelTransformer

Sleeping

App Files Files Community

NovelTransformer / app /main.py

NoLev

Update app/main.py

f0f4e78 verified 5 months ago

raw

history blame contribute delete

8.69 kB

	from fastapi import FastAPI, HTTPException, File, UploadFile, Form
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import HTMLResponse, StreamingResponse
	from pydantic import BaseModel
	import os
	import requests
	import json
	import logging
	from typing import Optional, Generator
	import PyPDF2
	import torch # Moved to top for device check
	from transformers import pipeline # For summarization

	# Suppress warnings for cleaner logs
	import warnings
	warnings.filterwarnings("ignore", category=FutureWarning)

	# OpenRouter API configuration
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
	OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"

	# Model suitable for explicit content
	DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "deepseek/deepseek-r1-0528:free") # Updated to your successful model

	app = FastAPI()

	# Logging setup
	logging.basicConfig(level=logging.DEBUG if os.getenv('LOG_LEVEL') == 'debug' else logging.INFO)
	logger = logging.getLogger(__name__)

	# Mount static files
	app.mount("/static", StaticFiles(directory="static"), name="static")

	# Summarization pipeline (loaded once)
	try:
	device = 0 if torch.cuda.is_available() else -1
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
	logger.info("BART summarizer loaded successfully")
	except Exception as e:
	logger.error(f"Failed to load BART: {e}. Using full transcript.")
	summarizer = None

	def extract_text_from_file(file: UploadFile) -> str:
	"""Extract text from uploaded file (TXT or PDF)."""
	if not file.filename.lower().endswith(('.txt', '.pdf')):
	raise HTTPException(status_code=400, detail="Only TXT or PDF files are supported.")

	content = ""
	try:
	if file.filename.lower().endswith('.txt'):
	content = file.file.read().decode('utf-8')
	elif file.filename.lower().endswith('.pdf'):
	pdf_reader = PyPDF2.PdfReader(file.file)
	for page in pdf_reader.pages:
	content += page.extract_text() + "\n"
	return content
	except Exception as e:
	logger.error(f"File extraction error: {e}")
	raise HTTPException(status_code=500, detail=f"Failed to read file: {str(e)}")
	finally:
	file.file.close()

	def summarize_transcript(transcript: str) -> str:
	"""Summarize transcript using BART to reduce tokens."""
	if not summarizer or len(transcript) < 100:
	return transcript
	try:
	# Chunk if too long (BART max ~1024 tokens)
	chunks = [transcript[i:i+1000] for i in range(0, len(transcript), 1000)]
	summaries = []
	for chunk in chunks:
	if len(chunk) > 50:
	summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
	summaries.append(summary)
	summarized = " ".join(summaries)
	logger.info(f"Summarized {len(transcript)} chars to {len(summarized)} chars")
	return summarized
	except Exception as e:
	logger.error(f"Summarization failed: {e}")
	return transcript

	def stream_openrouter(messages: list, model: str) -> Generator[str, None, None]:
	"""Stream response from OpenRouter."""
	if not OPENROUTER_API_KEY:
	raise ValueError("OPENROUTER_API_KEY not set")
	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	"HTTP-Referer": "https://huggingface.co/spaces/NoLev/PodcastToNovel",
	"X-Title": "Podcast to Novel Converter"
	}
	payload = {
	"model": model,
	"messages": messages,
	"temperature": 0.7,
	"max_tokens": 8000,
	"stream": True
	}
	try:
	logger.info(f"Streaming OpenRouter with model: {model}")
	with requests.post(OPENROUTER_API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
	if response.status_code != 200:
	raise HTTPException(status_code=response.status_code, detail=f"OpenRouter error: {response.text}")
	full_content = ""
	for line in response.iter_lines():
	if line:
	decoded = line.decode('utf-8')
	if decoded.startswith('data: '):
	data = decoded[6:]
	if data == '[DONE]':
	break
	try:
	chunk = json.loads(data)
	delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
	if delta:
	full_content += delta
	yield delta # Yield chunk for streaming
	except json.JSONDecodeError:
	continue
	logger.info(f"Streaming complete: {len(full_content)} chars")
	except Exception as e:
	logger.error(f"Streaming failed: {e}")
	raise HTTPException(status_code=500, detail=f"Failed to stream: {str(e)}")

	def generate_outline_stream(transcript: str, model: str, chapter_count: int, word_count: int, custom_prompt: Optional[str]) -> Generator[str, None, None]:
	"""Generate and stream outline from (summarized) transcript."""
	if not transcript:
	yield "Error: Transcript is empty"
	return

	summarized = summarize_transcript(transcript)

	# Build dynamic system prompt
	base_prompt = f"""
	You are a creative novelist specializing in dark romance novels featuring open marriages, BDSM, and swinging lifestyles. Analyze the provided podcast transcript summary for themes, dialogues, character dynamics, and plot seeds. Explicit content is expected—generate mature, consensual erotic scenes with psychological depth, power imbalances, jealousy, and redemption arcs.

	Output ONLY a detailed, gripping chapter-by-chapter outline for a {word_count}-word novel ({chapter_count} chapters, ~{word_count // chapter_count} words each) in markdown format. No JSON, no intro/explanation. Structure each chapter as:

	## Chapter X: Title
	- Hook: Pulse-pounding opening scene (e.g., mid-BDSM encounter).
	- Plot Beats: 4-6 key events with escalating tension, witty banter, open marriage twists.
	- Erotic Encounters: 2-3 explicit, sensory-rich BDSM/swinging scenes (consensual, detailed but concise).
	- Conflict: Core jealousy/power struggle.
	- Dialogue Snippets: 2-3 witty/emotional exchanges.
	- Cliffhanger: Shocking twist leading to next chapter.

	Ensure overall arc: rising erotic tension, mid-book betrayal/redemption, explosive finale. Expand ideas into addictive, original plot.
	"""

	if custom_prompt:
	base_prompt = f"User instructions: {custom_prompt}. Adapt the outline accordingly.\n\n{base_prompt}"

	user_prompt = f"Generate dark romance outline from this transcript summary:\n\n{summarized}"

	messages = [
	{"role": "system", "content": base_prompt},
	{"role": "user", "content": user_prompt}
	]

	try:
	for chunk in stream_openrouter(messages, model):
	yield chunk
	except Exception as e:
	logger.error(f"Generation error: {e}")
	yield f"Error: {str(e)}"

	# Streaming endpoint
	@app.post("/generate_novel")
	async def generate_novel_stream(
	transcript: str = Form(None),
	file: Optional[UploadFile] = File(None),
	model: str = Form(DEFAULT_MODEL),
	chapter_count: int = Form(10),
	word_count: int = Form(90000),
	custom_prompt: Optional[str] = Form(None)
	):
	logger.info(f"Starting streaming generation - Model: {model}, Chapters: {chapter_count}, Words: {word_count}")
	try:
	# Handle file upload if provided
	if file:
	transcript_text = extract_text_from_file(file)
	logger.info(f"Extracted {len(transcript_text)} chars from file {file.filename}")
	elif transcript:
	transcript_text = transcript
	else:
	raise HTTPException(status_code=400, detail="Provide either transcript text or a file.")

	return StreamingResponse(
	generate_outline_stream(transcript_text, model, chapter_count, word_count, custom_prompt),
	media_type="text/plain"
	)
	except HTTPException:
	raise
	except Exception as e:
	error_msg = f"Streaming failed: {str(e)}"
	logger.error(error_msg)
	raise HTTPException(status_code=500, detail=error_msg)

	# Serve the frontend
	@app.get("/")
	async def serve_index():
	with open("static/index.html", "r") as f:
	return HTMLResponse(content=f.read())