Rakshitjan commited on
Commit
f6a1998
·
verified ·
1 Parent(s): d3d5e83

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +311 -0
main.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
3
+ from fastapi.responses import JSONResponse, StreamingResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ import google.generativeai as genai
6
+ import pdfplumber
7
+ import json
8
+ import re
9
+ import os
10
+ import io
11
+ from gtts import gTTS
12
+ from pydub import AudioSegment
13
+ import uuid
14
+ import asyncio
15
+ from pydantic import BaseModel
16
+ from typing import Dict, List, Optional
17
+ import shutil
18
+ import tempfile
19
+
20
+ app = FastAPI(title="PDF to Audio Converter")
21
+
22
+ # Configure CORS
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"], # Specify your frontend domains in production
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ # Global storage for tracking job status
32
+ job_status = {}
33
+
34
+ class JobStatus(BaseModel):
35
+ job_id: str
36
+ status: str
37
+ progress: int
38
+ message: Optional[str] = None
39
+ result_url: Optional[str] = None
40
+
41
+ @app.on_event("startup")
42
+ async def startup_event():
43
+ # Create temp directory for storing files
44
+ os.makedirs("temp", exist_ok=True)
45
+
46
+ # Configure Gemini API
47
+ api_key = os.environ.get("GOOGLE_API_KEY")
48
+ if not api_key:
49
+ print("Warning: GOOGLE_API_KEY not found. API functionality will be limited.")
50
+ else:
51
+ genai.configure(api_key=api_key)
52
+
53
+ def extract_text_from_pdf(file_path):
54
+ """Extract text from PDF using pdfplumber"""
55
+ text = ""
56
+ with pdfplumber.open(file_path) as pdf:
57
+ for page in pdf.pages:
58
+ page_text = page.extract_text()
59
+ if page_text:
60
+ text += page_text + "\n"
61
+ return text
62
+
63
+ async def generate_conversation(pdf_text):
64
+ """Generate conversation from PDF text using Gemini"""
65
+ try:
66
+ api_key = os.environ.get("GOOGLE_API_KEY")
67
+ if not api_key:
68
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
69
+
70
+ model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25')
71
+
72
+ output_format = """
73
+ [
74
+ {"Emily": "..."},
75
+ {"Bob": "..."},
76
+ {"Emily": "..."},
77
+ {"Bob": "..."}
78
+ ]
79
+ """
80
+
81
+ query = f"""
82
+ You are the expert conversation generator for the JEE student based on provided inputs. Your task is to
83
+ generate the incentive conversation between Emily and her friend Bob explaining ALL the concepts to each others in *DETAILS*.
84
+
85
+ The content to use to generate the conversations:
86
+ {pdf_text}
87
+ -----------------------------------------------------------------------
88
+
89
+ **NOTE**:
90
+ - Do not include ```json anywhere.
91
+ - All points in the given content should be explained with details in output conversation.
92
+ - **Some dialog should contain filler words only**. Do not limit the conversation.
93
+ - The conversation should include filler words such as umm, yahh, etc. at proper places specially for Emily.
94
+ - The conversation will be read by tts so make it very easy and accurate to read.
95
+ - The formulas should be accurately read by tts.
96
+ - It should include pauses, emphasizes, and similar emotions.
97
+ - All the topics in the given content should be covered with better and detailed explanations in the output discussion.
98
+ - Make conversation with significant length so that all the concepts should be covered without fail.
99
+ - The listener should understand the concepts in the given content easily by listening to the conversation between Bob and Emily.
100
+ - The conversation should be filled with pleasure, emotions, and all.
101
+ - All contents given to you should be completely explained to listener by hearing the conversations.
102
+
103
+ The output format should strictly follow this output format:
104
+ {output_format}
105
+
106
+ Strictly follow the provided output format and do *not* include extra intro or '''dot heading.
107
+ Output Format Rules:
108
+ Rules:
109
+ 1. **Ensure the JSON is syntactically correct** before responding.
110
+ 2. Do not include markdown (```json).
111
+ 3. Verify there are no extra commas, missing brackets, or incorrect types.
112
+ 4. Respond **only with the JSON** (no explanations)
113
+ """
114
+
115
+ response = model.generate_content(query)
116
+ text_content = response.text
117
+
118
+ # Clean up the response
119
+ cleaned_text = text_content.strip("```").strip()
120
+ cleaned_text = re.sub(r"^json", "", cleaned_text, flags=re.IGNORECASE).strip()
121
+
122
+ # Fix common JSON issues
123
+ cleaned_text = re.sub(r",\s*([\]}])", r"\1", cleaned_text)
124
+
125
+ try:
126
+ parsed_json = json.loads(cleaned_text)
127
+ return parsed_json
128
+ except json.JSONDecodeError as e:
129
+ print(f"JSON Parse Error: {e}")
130
+ print(f"Problem text: {cleaned_text}")
131
+ raise ValueError(f"Failed to parse generated conversation: {str(e)}")
132
+ except Exception as e:
133
+ print(f"Error generating conversation: {str(e)}")
134
+ raise
135
+
136
+ def generate_female_voice(text, filename):
137
+ """Generate female voice using gTTS"""
138
+ tts = gTTS(text=text, lang='en')
139
+ tts.save(filename)
140
+ return AudioSegment.from_file(filename)
141
+
142
+ def generate_male_voice(text, filename):
143
+ """Generate male voice by lowering pitch"""
144
+ temp_file = f"{filename}_temp.mp3"
145
+ tts = gTTS(text=text, lang='en')
146
+ tts.save(temp_file)
147
+
148
+ sound = AudioSegment.from_file(temp_file)
149
+ lower_pitch = sound._spawn(sound.raw_data, overrides={
150
+ "frame_rate": int(sound.frame_rate * 0.85)
151
+ }).set_frame_rate(sound.frame_rate)
152
+
153
+ lower_pitch.export(filename, format="mp3")
154
+ os.remove(temp_file)
155
+ return lower_pitch
156
+
157
+ async def process_pdf_to_audio(job_id: str, file_path: str):
158
+ """Process PDF to Audio with status updates"""
159
+ try:
160
+ # Extract text from PDF
161
+ job_status[job_id] = JobStatus(job_id=job_id, status="processing", progress=10,
162
+ message="Extracting text from PDF...")
163
+ pdf_text = extract_text_from_pdf(file_path)
164
+ if not pdf_text.strip():
165
+ job_status[job_id] = JobStatus(job_id=job_id, status="error", progress=0,
166
+ message="No text extracted from PDF")
167
+ return
168
+
169
+ # Generate conversation
170
+ job_status[job_id] = JobStatus(job_id=job_id, status="processing", progress=30,
171
+ message="Generating conversation...")
172
+ conversation = await generate_conversation(pdf_text)
173
+
174
+ # Create temp directory for audio files
175
+ output_dir = f"temp/{job_id}"
176
+ os.makedirs(output_dir, exist_ok=True)
177
+
178
+ # Generate audio for each line
179
+ job_status[job_id] = JobStatus(job_id=job_id, status="processing", progress=50,
180
+ message="Generating voices...")
181
+
182
+ speaker_voice_map = {
183
+ "Emily": "female",
184
+ "Bob": "male"
185
+ }
186
+
187
+ final_podcast = AudioSegment.silent(duration=1000) # 1 sec silence at start
188
+
189
+ total_lines = len(conversation)
190
+ for i, line_dict in enumerate(conversation):
191
+ for speaker, line in line_dict.items():
192
+ voice_type = speaker_voice_map.get(speaker, "female")
193
+ filename = f"{output_dir}/{i}_{speaker}.mp3"
194
+
195
+ if voice_type == "female":
196
+ voice = generate_female_voice(line, filename)
197
+ else:
198
+ voice = generate_male_voice(line, filename)
199
+
200
+ final_podcast += voice + AudioSegment.silent(duration=500)
201
+
202
+ # Update progress (50% to 90%)
203
+ progress = 50 + int(40 * (i+1) / total_lines)
204
+ job_status[job_id] = JobStatus(job_id=job_id, status="processing", progress=progress,
205
+ message=f"Processing dialogue {i+1}/{total_lines}")
206
+
207
+ # Export final audio
208
+ output_filename = f"temp/{job_id}/final_podcast.mp3"
209
+ job_status[job_id] = JobStatus(job_id=job_id, status="processing", progress=95,
210
+ message="Exporting final audio...")
211
+ final_podcast.export(output_filename, format="mp3")
212
+
213
+ # Complete job
214
+ job_status[job_id] = JobStatus(
215
+ job_id=job_id,
216
+ status="complete",
217
+ progress=100,
218
+ message="Processing complete",
219
+ result_url=f"/download/{job_id}"
220
+ )
221
+
222
+ except Exception as e:
223
+ print(f"Error processing job {job_id}: {str(e)}")
224
+ job_status[job_id] = JobStatus(job_id=job_id, status="error", progress=0,
225
+ message=f"Error: {str(e)}")
226
+
227
+ @app.post("/upload/")
228
+ async def upload_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
229
+ """Upload and process a PDF file"""
230
+ try:
231
+ # Validate file is a PDF
232
+ if not file.filename.endswith('.pdf'):
233
+ raise HTTPException(status_code=400, detail="File must be a PDF")
234
+
235
+ # Generate a job ID
236
+ job_id = str(uuid.uuid4())
237
+
238
+ # Save uploaded file
239
+ temp_file_path = f"temp/{job_id}_upload.pdf"
240
+ with open(temp_file_path, "wb") as buffer:
241
+ shutil.copyfileobj(file.file, buffer)
242
+
243
+ # Initialize job status
244
+ job_status[job_id] = JobStatus(job_id=job_id, status="uploaded", progress=5,
245
+ message="File uploaded, starting processing")
246
+
247
+ # Process in background
248
+ background_tasks.add_task(process_pdf_to_audio, job_id, temp_file_path)
249
+
250
+ return {"job_id": job_id, "message": "File uploaded successfully. Processing started."}
251
+
252
+ except Exception as e:
253
+ raise HTTPException(status_code=500, detail=str(e))
254
+
255
+ @app.get("/status/{job_id}")
256
+ async def get_job_status(job_id: str):
257
+ """Get status of a processing job"""
258
+ if job_id not in job_status:
259
+ raise HTTPException(status_code=404, detail="Job not found")
260
+
261
+ return job_status[job_id]
262
+
263
+ @app.get("/download/{job_id}")
264
+ async def download_audio(job_id: str):
265
+ """Download the processed audio file"""
266
+ if job_id not in job_status or job_status[job_id].status != "complete":
267
+ raise HTTPException(status_code=404, detail="Audio not ready or job not found")
268
+
269
+ file_path = f"temp/{job_id}/final_podcast.mp3"
270
+ if not os.path.exists(file_path):
271
+ raise HTTPException(status_code=404, detail="File not found")
272
+
273
+ def iterfile():
274
+ with open(file_path, mode="rb") as file_like:
275
+ yield from file_like
276
+
277
+ return StreamingResponse(
278
+ iterfile(),
279
+ media_type="audio/mpeg",
280
+ headers={"Content-Disposition": f"attachment; filename=podcast_{job_id}.mp3"}
281
+ )
282
+
283
+ @app.delete("/job/{job_id}")
284
+ async def delete_job(job_id: str):
285
+ """Delete a job and its files"""
286
+ if job_id not in job_status:
287
+ raise HTTPException(status_code=404, detail="Job not found")
288
+
289
+ # Remove job files
290
+ job_dir = f"temp/{job_id}"
291
+ upload_file = f"temp/{job_id}_upload.pdf"
292
+
293
+ if os.path.exists(job_dir):
294
+ shutil.rmtree(job_dir)
295
+
296
+ if os.path.exists(upload_file):
297
+ os.remove(upload_file)
298
+
299
+ # Remove from status tracking
300
+ del job_status[job_id]
301
+
302
+ return {"message": "Job deleted successfully"}
303
+
304
+ @app.get("/health")
305
+ async def health_check():
306
+ """Health check endpoint"""
307
+ return {"status": "healthy"}
308
+
309
+ if __name__ == "__main__":
310
+ import uvicorn
311
+ uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)