Spaces:
Build error
Build error
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import google.generativeai as genai | |
| import pdfplumber | |
| import json | |
| import re | |
| import os | |
| import tempfile | |
| import shutil | |
| from gtts import gTTS | |
| from pydub import AudioSegment | |
| from io import BytesIO | |
| app = FastAPI() | |
| # CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| def startup_event(): | |
| api_key = os.environ.get("GOOGLE_API_KEY") | |
| if api_key: | |
| genai.configure(api_key=api_key) | |
| else: | |
| print("Warning: GOOGLE_API_KEY not found") | |
| def extract_text_from_pdf(file_bytes): | |
| text = "" | |
| with pdfplumber.open(file_bytes) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text | |
| def generate_conversation(pdf_text): | |
| model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25') | |
| output_format = """ | |
| [ | |
| {"Emily": "..."}, | |
| {"Bob": "..."} | |
| ] | |
| """ | |
| query = f""" | |
| You are the expert conversation generator for the JEE student based on provided inputs.Your task is to | |
| generate the incentive conversation between Emily and her friend Bob explaining ALL the concepts to each others in *DETAILS*. | |
| The content to use to generate the conversations: | |
| {pdf_text} | |
| ----------------------------------------------------------------------- | |
| **NOTE**: | |
| - Do not include ```json anywere. | |
| - All points in the givent content should be explained with details in output conversation. | |
| - **Some dialog should contain filler words only**.Do not limit the conversation. | |
| - The conversation should inlcudes filler words such as umm, yahh,etc. at proper places specially for Emily. | |
| - The conversation will be read by tts so make it very easy and accurate to read. | |
| - The formulas should be accuratly read by tts. | |
| - It should include pauses, emphasizes, and similar emotions. | |
| - All the topics in the given content should be coverd with bettere and detailed explanations in the output disscusion. | |
| - Make conversation with significant length so that all the concepts should be covered without fail. | |
| - The listner should understand the concepts in the given content easily by listening to the conversation between Bob and Emily. | |
| - The conversation should be filled with pleasure , emotions, and all. | |
| - All contents given to you should be completly explained to listner by hering the convesations. | |
| The output format should strictly follow this output format: | |
| {output_format} | |
| Strictly follow the provided output format and do *not* include extra intro or '''dot heading. | |
| OutPut Format Rules : | |
| Rules: | |
| 1. **Ensure the JSON is syntactically correct** before responding. | |
| 2. Do not include markdown (```json). | |
| 3. Verify there are no extra commas, missing brackets, or incorrect types. | |
| 4. Respond **only with the JSON** (no explanations) | |
| """ | |
| response = model.generate_content(query) | |
| cleaned_text = response.text.strip("```").strip() | |
| cleaned_text = re.sub(r"^json", "", cleaned_text, flags=re.IGNORECASE).strip() | |
| cleaned_text = re.sub(r",\s*([\]}])", r"\1", cleaned_text) | |
| try: | |
| return json.loads(cleaned_text) | |
| except json.JSONDecodeError as e: | |
| print(f"JSON Error: {e}") | |
| raise ValueError("Failed to parse generated conversation") | |
| def create_audio_stream(conversation): | |
| def generate_female_voice(text): | |
| tts = gTTS(text=text, lang='en') | |
| buf = BytesIO() | |
| tts.write_to_fp(buf) | |
| buf.seek(0) | |
| return AudioSegment.from_file(buf, format="mp3") | |
| def generate_male_voice(text): | |
| tts = gTTS(text=text, lang='en') | |
| buf = BytesIO() | |
| tts.write_to_fp(buf) | |
| buf.seek(0) | |
| sound = AudioSegment.from_file(buf, format="mp3") | |
| lower_pitch = sound._spawn(sound.raw_data, overrides={ | |
| "frame_rate": int(sound.frame_rate * 0.85) | |
| }).set_frame_rate(sound.frame_rate) | |
| return lower_pitch | |
| speaker_voice_map = {"Emily": "female", "Bob": "male"} | |
| final_audio = AudioSegment.silent(duration=1000) | |
| for i, line_dict in enumerate(conversation): | |
| for speaker, line in line_dict.items(): | |
| voice_type = speaker_voice_map.get(speaker, "female") | |
| if voice_type == "female": | |
| voice = generate_female_voice(line) | |
| else: | |
| voice = generate_male_voice(line) | |
| final_audio += voice + AudioSegment.silent(duration=500) | |
| output_bytes = BytesIO() | |
| final_audio.export(output_bytes, format="mp3") | |
| output_bytes.seek(0) | |
| return output_bytes | |
| async def convert_pdf_to_audio(file: UploadFile = File(...)): | |
| try: | |
| file_bytes = BytesIO(await file.read()) | |
| text = extract_text_from_pdf(file_bytes) | |
| if not text.strip(): | |
| raise HTTPException(status_code=400, detail="No text extracted from PDF") | |
| conversation = generate_conversation(text) | |
| audio_stream = create_audio_stream(conversation) | |
| return StreamingResponse( | |
| audio_stream, | |
| media_type="audio/mpeg", | |
| headers={"Content-Disposition": f"attachment; filename=audio_{file.filename.split('.')[0]}.mp3"} | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def health_check(): | |
| return {"status": "healthy"} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True) | |