Spaces:

Rakshitjan
/

pdfPodcastPrototype

Build error

App Files Files Community

pdfPodcastPrototype / main.py

Rakshitjan

Update main.py

e83f774 verified 10 months ago

raw

history blame contribute delete

5.73 kB

	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import StreamingResponse
	from fastapi.middleware.cors import CORSMiddleware
	import google.generativeai as genai
	import pdfplumber
	import json
	import re
	import os
	import tempfile
	import shutil
	from gtts import gTTS
	from pydub import AudioSegment
	from io import BytesIO

	app = FastAPI()

	# CORS
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	@app.on_event("startup")
	def startup_event():
	api_key = os.environ.get("GOOGLE_API_KEY")
	if api_key:
	genai.configure(api_key=api_key)
	else:
	print("Warning: GOOGLE_API_KEY not found")

	def extract_text_from_pdf(file_bytes):
	text = ""
	with pdfplumber.open(file_bytes) as pdf:
	for page in pdf.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	return text

	def generate_conversation(pdf_text):
	model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25')

	output_format = """
	[
	{"Emily": "..."},
	{"Bob": "..."}
	]
	"""

	query = f"""
	You are the expert conversation generator for the JEE student based on provided inputs.Your task is to
	generate the incentive conversation between Emily and her friend Bob explaining ALL the concepts to each others in DETAILS.

	The content to use to generate the conversations:
	{pdf_text}
	-----------------------------------------------------------------------

	NOTE:
	- Do not include ```json anywere.
	- All points in the givent content should be explained with details in output conversation.
	- Some dialog should contain filler words only.Do not limit the conversation.
	- The conversation should inlcudes filler words such as umm, yahh,etc. at proper places specially for Emily.
	- The conversation will be read by tts so make it very easy and accurate to read.
	- The formulas should be accuratly read by tts.
	- It should include pauses, emphasizes, and similar emotions.
	- All the topics in the given content should be coverd with bettere and detailed explanations in the output disscusion.
	- Make conversation with significant length so that all the concepts should be covered without fail.
	- The listner should understand the concepts in the given content easily by listening to the conversation between Bob and Emily.
	- The conversation should be filled with pleasure , emotions, and all.
	- All contents given to you should be completly explained to listner by hering the convesations.

	The output format should strictly follow this output format:
	{output_format}

	Strictly follow the provided output format and do not include extra intro or '''dot heading.
	OutPut Format Rules :
	Rules:
	1. Ensure the JSON is syntactically correct before responding.
	2. Do not include markdown (```json).
	3. Verify there are no extra commas, missing brackets, or incorrect types.
	4. Respond only with the JSON (no explanations)
	"""

	response = model.generate_content(query)
	cleaned_text = response.text.strip("```").strip()
	cleaned_text = re.sub(r"^json", "", cleaned_text, flags=re.IGNORECASE).strip()
	cleaned_text = re.sub(r",\s*([\]}])", r"\1", cleaned_text)

	try:
	return json.loads(cleaned_text)
	except json.JSONDecodeError as e:
	print(f"JSON Error: {e}")
	raise ValueError("Failed to parse generated conversation")

	def create_audio_stream(conversation):
	def generate_female_voice(text):
	tts = gTTS(text=text, lang='en')
	buf = BytesIO()
	tts.write_to_fp(buf)
	buf.seek(0)
	return AudioSegment.from_file(buf, format="mp3")

	def generate_male_voice(text):
	tts = gTTS(text=text, lang='en')
	buf = BytesIO()
	tts.write_to_fp(buf)
	buf.seek(0)
	sound = AudioSegment.from_file(buf, format="mp3")
	lower_pitch = sound._spawn(sound.raw_data, overrides={
	"frame_rate": int(sound.frame_rate * 0.85)
	}).set_frame_rate(sound.frame_rate)
	return lower_pitch

	speaker_voice_map = {"Emily": "female", "Bob": "male"}
	final_audio = AudioSegment.silent(duration=1000)

	for i, line_dict in enumerate(conversation):
	for speaker, line in line_dict.items():
	voice_type = speaker_voice_map.get(speaker, "female")
	if voice_type == "female":
	voice = generate_female_voice(line)
	else:
	voice = generate_male_voice(line)
	final_audio += voice + AudioSegment.silent(duration=500)

	output_bytes = BytesIO()
	final_audio.export(output_bytes, format="mp3")
	output_bytes.seek(0)
	return output_bytes

	@app.post("/convert/")
	async def convert_pdf_to_audio(file: UploadFile = File(...)):
	try:
	file_bytes = BytesIO(await file.read())
	text = extract_text_from_pdf(file_bytes)
	if not text.strip():
	raise HTTPException(status_code=400, detail="No text extracted from PDF")
	conversation = generate_conversation(text)
	audio_stream = create_audio_stream(conversation)
	return StreamingResponse(
	audio_stream,
	media_type="audio/mpeg",
	headers={"Content-Disposition": f"attachment; filename=audio_{file.filename.split('.')[0]}.mp3"}
	)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/health")
	def health_check():
	return {"status": "healthy"}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)