File size: 6,594 Bytes
a4b70d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from __future__ import annotations

from urllib.parse import quote
from aiohttp import ClientSession

from ...typing import AsyncResult, Messages
from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
from ..helper import format_media_prompt, get_system_prompt
from ...image.copy_images import save_response_media
from ...providers.response import AudioResponse
from ...requests.raise_for_status import raise_for_status
from ...requests.aiohttp import get_connector
from ...requests import DEFAULT_HEADERS

class OpenAIFM(AsyncGeneratorProvider, ProviderModelMixin):
    label = "OpenAI.fm"
    url = "https://www.openai.fm"
    api_endpoint = "https://www.openai.fm/api/generate"
    working = True

    default_model = 'coral'
    voices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer', 'verse']
    styles = ['friendly', 'patient_teacher', 'noir_detective', 'cowboy', 'calm', 'scientific_style']
    audio_models = {"gpt-4o-mini-tts": voices}
    model_aliases = {"gpt-4o-mini-tts": default_model}
    models = styles + voices

    @classmethod
    def get_grouped_models(cls):
        return [
            {"group":"Styles", "models": cls.styles},
            {"group":"Voices", "models": cls.voices},
        ]

    friendly = """Affect/personality: A cheerful guide 

Tone: Friendly, clear, and reassuring, creating a calm atmosphere and making the listener feel confident and comfortable.

Pronunciation: Clear, articulate, and steady, ensuring each instruction is easily understood while maintaining a natural, conversational flow.

Pause: Brief, purposeful pauses after key instructions (e.g., "cross the street" and "turn right") to allow time for the listener to process the information and follow along.

Emotion: Warm and supportive, conveying empathy and care, ensuring the listener feels guided and safe throughout the journey."""

    patient_teacher = """Accent/Affect: Warm, refined, and gently instructive, reminiscent of a friendly art instructor.

Tone: Calm, encouraging, and articulate, clearly describing each step with patience.

Pacing: Slow and deliberate, pausing often to allow the listener to follow instructions comfortably.

Emotion: Cheerful, supportive, and pleasantly enthusiastic; convey genuine enjoyment and appreciation of art.

Pronunciation: Clearly articulate artistic terminology (e.g., "brushstrokes," "landscape," "palette") with gentle emphasis.

Personality Affect: Friendly and approachable with a hint of sophistication; speak confidently and reassuringly, guiding users through each painting step patiently and warmly."""

    noir_detective = """Affect: a mysterious noir detective

Tone: Cool, detached, but subtly reassuring—like they've seen it all and know how to handle a missing package like it's just another case.

Delivery: Slow and deliberate, with dramatic pauses to build suspense, as if every detail matters in this investigation.

Emotion: A mix of world-weariness and quiet determination, with just a hint of dry humor to keep things from getting too grim.

Punctuation: Short, punchy sentences with ellipses and dashes to create rhythm and tension, mimicking the inner monologue of a detective piecing together clues."""

    cowboy = """Voice: Warm, relaxed, and friendly, with a steady cowboy drawl that feels approachable.

Punctuation: Light and natural, with gentle pauses that create a conversational rhythm without feeling rushed.

Delivery: Smooth and easygoing, with a laid-back pace that reassures the listener while keeping things clear.

Phrasing: Simple, direct, and folksy, using casual, familiar language to make technical support feel more personable.

Tone: Lighthearted and welcoming, with a calm confidence that puts the caller at ease."""

    calm = """Voice Affect: Calm, composed, and reassuring; project quiet authority and confidence.

Tone: Sincere, empathetic, and gently authoritative—express genuine apology while conveying competence.

Pacing: Steady and moderate; unhurried enough to communicate care, yet efficient enough to demonstrate professionalism.

Emotion: Genuine empathy and understanding; speak with warmth, especially during apologies ("I'm very sorry for any disruption...").

Pronunciation: Clear and precise, emphasizing key reassurances ("smoothly," "quickly," "promptly") to reinforce confidence.

Pauses: Brief pauses after offering assistance or requesting details, highlighting willingness to listen and support."""

    scientific_style = """Voice: Authoritative and precise, with a measured, academic tone.

Tone: Formal and analytical, maintaining objectivity while conveying complex information.

Pacing: Moderate and deliberate, allowing time for complex concepts to be processed.

Pronunciation: Precise articulation of technical terms and scientific vocabulary.

Pauses: Strategic pauses after introducing new concepts to allow for comprehension.

Emotion: Restrained enthusiasm for discoveries and findings, conveying intellectual curiosity."""

    @classmethod
    async def create_async_generator(
        cls,
        model: str,
        messages: Messages,
        proxy: str = None,
        prompt: str = None,
        audio: dict = {},
        download_media: bool = True,
        **kwargs
    ) -> AsyncResult:
        default_instructions = get_system_prompt(messages)
        if model and hasattr(cls, model):
            default_instructions = getattr(cls, model)
            model = ""
        model = cls.get_model(model)
        voice = audio.get("voice", kwargs.get("voice", model))
        instructions = audio.get("instructions", kwargs.get("instructions", default_instructions))
        headers = {
            **DEFAULT_HEADERS,
            "referer": f"{cls.url}/"
        }
        prompt = format_media_prompt(messages, prompt)
        params = {
            "input": prompt,
            "prompt": instructions,
            "voice": voice
        }
        if not download_media:
            query = "&".join(f"{k}={quote(str(v))}" for k, v in params.items() if v is not None)
            yield AudioResponse(f"{cls.api_endpoint}?{query}")
            return
        async with ClientSession(headers=headers, connector=get_connector(proxy=proxy)) as session:            
            async with session.get(
                cls.api_endpoint,
                params=params
            ) as response:
                await raise_for_status(response)                
                async for chunk in save_response_media(response, prompt, [model, voice]):
                    yield chunk