File size: 6,594 Bytes
a4b70d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from __future__ import annotations
from urllib.parse import quote
from aiohttp import ClientSession
from ...typing import AsyncResult, Messages
from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
from ..helper import format_media_prompt, get_system_prompt
from ...image.copy_images import save_response_media
from ...providers.response import AudioResponse
from ...requests.raise_for_status import raise_for_status
from ...requests.aiohttp import get_connector
from ...requests import DEFAULT_HEADERS
class OpenAIFM(AsyncGeneratorProvider, ProviderModelMixin):
label = "OpenAI.fm"
url = "https://www.openai.fm"
api_endpoint = "https://www.openai.fm/api/generate"
working = True
default_model = 'coral'
voices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer', 'verse']
styles = ['friendly', 'patient_teacher', 'noir_detective', 'cowboy', 'calm', 'scientific_style']
audio_models = {"gpt-4o-mini-tts": voices}
model_aliases = {"gpt-4o-mini-tts": default_model}
models = styles + voices
@classmethod
def get_grouped_models(cls):
return [
{"group":"Styles", "models": cls.styles},
{"group":"Voices", "models": cls.voices},
]
friendly = """Affect/personality: A cheerful guide
Tone: Friendly, clear, and reassuring, creating a calm atmosphere and making the listener feel confident and comfortable.
Pronunciation: Clear, articulate, and steady, ensuring each instruction is easily understood while maintaining a natural, conversational flow.
Pause: Brief, purposeful pauses after key instructions (e.g., "cross the street" and "turn right") to allow time for the listener to process the information and follow along.
Emotion: Warm and supportive, conveying empathy and care, ensuring the listener feels guided and safe throughout the journey."""
patient_teacher = """Accent/Affect: Warm, refined, and gently instructive, reminiscent of a friendly art instructor.
Tone: Calm, encouraging, and articulate, clearly describing each step with patience.
Pacing: Slow and deliberate, pausing often to allow the listener to follow instructions comfortably.
Emotion: Cheerful, supportive, and pleasantly enthusiastic; convey genuine enjoyment and appreciation of art.
Pronunciation: Clearly articulate artistic terminology (e.g., "brushstrokes," "landscape," "palette") with gentle emphasis.
Personality Affect: Friendly and approachable with a hint of sophistication; speak confidently and reassuringly, guiding users through each painting step patiently and warmly."""
noir_detective = """Affect: a mysterious noir detective
Tone: Cool, detached, but subtly reassuring—like they've seen it all and know how to handle a missing package like it's just another case.
Delivery: Slow and deliberate, with dramatic pauses to build suspense, as if every detail matters in this investigation.
Emotion: A mix of world-weariness and quiet determination, with just a hint of dry humor to keep things from getting too grim.
Punctuation: Short, punchy sentences with ellipses and dashes to create rhythm and tension, mimicking the inner monologue of a detective piecing together clues."""
cowboy = """Voice: Warm, relaxed, and friendly, with a steady cowboy drawl that feels approachable.
Punctuation: Light and natural, with gentle pauses that create a conversational rhythm without feeling rushed.
Delivery: Smooth and easygoing, with a laid-back pace that reassures the listener while keeping things clear.
Phrasing: Simple, direct, and folksy, using casual, familiar language to make technical support feel more personable.
Tone: Lighthearted and welcoming, with a calm confidence that puts the caller at ease."""
calm = """Voice Affect: Calm, composed, and reassuring; project quiet authority and confidence.
Tone: Sincere, empathetic, and gently authoritative—express genuine apology while conveying competence.
Pacing: Steady and moderate; unhurried enough to communicate care, yet efficient enough to demonstrate professionalism.
Emotion: Genuine empathy and understanding; speak with warmth, especially during apologies ("I'm very sorry for any disruption...").
Pronunciation: Clear and precise, emphasizing key reassurances ("smoothly," "quickly," "promptly") to reinforce confidence.
Pauses: Brief pauses after offering assistance or requesting details, highlighting willingness to listen and support."""
scientific_style = """Voice: Authoritative and precise, with a measured, academic tone.
Tone: Formal and analytical, maintaining objectivity while conveying complex information.
Pacing: Moderate and deliberate, allowing time for complex concepts to be processed.
Pronunciation: Precise articulation of technical terms and scientific vocabulary.
Pauses: Strategic pauses after introducing new concepts to allow for comprehension.
Emotion: Restrained enthusiasm for discoveries and findings, conveying intellectual curiosity."""
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
proxy: str = None,
prompt: str = None,
audio: dict = {},
download_media: bool = True,
**kwargs
) -> AsyncResult:
default_instructions = get_system_prompt(messages)
if model and hasattr(cls, model):
default_instructions = getattr(cls, model)
model = ""
model = cls.get_model(model)
voice = audio.get("voice", kwargs.get("voice", model))
instructions = audio.get("instructions", kwargs.get("instructions", default_instructions))
headers = {
**DEFAULT_HEADERS,
"referer": f"{cls.url}/"
}
prompt = format_media_prompt(messages, prompt)
params = {
"input": prompt,
"prompt": instructions,
"voice": voice
}
if not download_media:
query = "&".join(f"{k}={quote(str(v))}" for k, v in params.items() if v is not None)
yield AudioResponse(f"{cls.api_endpoint}?{query}")
return
async with ClientSession(headers=headers, connector=get_connector(proxy=proxy)) as session:
async with session.get(
cls.api_endpoint,
params=params
) as response:
await raise_for_status(response)
async for chunk in save_response_media(response, prompt, [model, voice]):
yield chunk
|