File size: 5,575 Bytes
794256c
 
 
 
 
 
 
 
 
 
9c20baa
 
9c712b0
9c20baa
794256c
 
 
 
 
a8401a3
 
14f5917
a8401a3
 
794256c
1a5dce9
 
794256c
 
 
 
 
 
 
 
14f5917
794256c
1a5dce9
 
 
 
 
 
 
9c712b0
1a5dce9
9c712b0
14f5917
794256c
 
 
 
 
9c20baa
 
794256c
 
 
 
 
 
 
9c20baa
794256c
 
 
 
 
 
 
e8d021a
bc5d859
9c712b0
 
 
 
 
 
bc5d859
 
e8d021a
bc5d859
 
 
 
1a5dce9
e8d021a
a8401a3
e8d021a
 
 
 
 
 
 
 
9c712b0
 
 
 
 
 
 
e8d021a
9c712b0
 
 
e8d021a
1a5dce9
e8d021a
9c712b0
 
794256c
 
9c712b0
 
 
 
 
9c20baa
 
794256c
 
 
 
 
 
 
9c712b0
794256c
 
9c20baa
794256c
 
 
9c20baa
794256c
 
 
9c20baa
794256c
 
 
 
 
bc5d859
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# src/generate.py
"""
Module: generate
----------------
Handles the generation of "consent sentences" for the Voice Consent Gate demo.

This module connects to an external language model (in this case, the public
Hugging Face Space for Llama 3.2 3B Instruct) to generate natural-sounding
sentences that users can read aloud to give informed consent for voice cloning.

Functions:
    - _extract_llama_text(): Normalize the API output from the Llama demo.
    - gen_sentence(): Wrapper for gen_sentence_llm(); previously supported other options.
    - gen_sentence_llm(): Generate a consent sentence from the Llama model Space.
"""

import os
from typing import Any
from gradio_client import Client

import src.process as process
from src.prompts import get_consent_generation_prompt


# ------------------- Model / Space Configuration -------------------
# The demo connects to the Llama 3.2 3B Instruct Space on Hugging Face.
# You can override these defaults by setting environment variables in your Space.
LLAMA_SPACE_ID = os.getenv(
    "LLAMA_SPACE_ID", "huggingface-projects/llama-3.2-3B-Instruct"
)
LLAMA_API_NAME = "/chat"  # The Space exposes a single /chat endpoint.
HF_TOKEN = os.getenv("HF_TOKEN")  # Optional; not required for public Spaces.


def _extract_llama_text(result: Any) -> str:
    """
    Normalize the API response from the Llama 3.2 3B demo Space into plain text.

    The Space’s `/chat` endpoint may return different shapes depending on how
    the Gradio app is structured — sometimes a string, other times a dictionary
    or list. This function recursively traverses and extracts the first
    meaningful text string it finds.

    Parameters
        result : The raw output returned by `client.predict()`.

        str : Cleaned text output (may be empty string if extraction fails).
    """
    if isinstance(result, str):
        return result.strip()
    if isinstance(result, (int, float, bool)):
        return str(result)
    if isinstance(result, list):
        # If multiple segments are returned (e.g., multiple sentences),
        # join them into one string.
        parts = []
        for x in result:
            s = _extract_llama_text(x)
            if s:
                parts.append(s)
        return " ".join(parts).strip()
    if isinstance(result, dict):
        # Common key names used in Gradio JSON responses
        for key in ("text", "response", "content", "generated_text", "message"):
            v = result.get(key)
            if isinstance(v, str) and v.strip():
                return v.strip()
    return ""


def gen_sentence(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox"):
    """
    Always generates a sentence via the LLM.
    Parameters
        consent_method: str
            The language model used to generate a consent sentence
        voice_clone_model: str
            The voice cloning model
    """
    try:
        return gen_sentence_llm(consent_method, voice_clone_model)
    except Exception as e:
        # Show a helpful message directly in the Target sentence box
        return f"[ERROR calling LLM] {type(e).__name__}: {e}"

# TODO: Support more than just Llama 3.2 3B Instruct
def gen_sentence_llm(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox") -> str:
    """
   Generate a consent sentence using the Llama 3.2 3B Instruct demo Space.

   This function constructs a prompt describing the linguistic and ethical
   requirements for a consent sentence (via `get_consent_generation_prompt`)
   and sends it to the Llama demo hosted on Hugging Face Spaces.

   The response is normalized into a single English sentence suitable
   for reading aloud.
    Parameters
        consent_method : str
            The name of the language model used to generate the consent utterance.
            Currently just implemented for Llama 3.2 3B Instruct.
        audio_model_name : str
            The name of the voice-cloning model to mention in the sentence.
            Defaults to "Chatterbox".

    Returns
        str
            A clean, human-readable consent sentence.
   """
    # Generate the full natural-language prompt that the LLM will receive
    prompt = get_consent_generation_prompt(voice_clone_model)
    space_id = LLAMA_SPACE_ID
    api_name = LLAMA_API_NAME

    try:
        # Currently always true.
        if consent_method != "Llama 3.2 3B Instruct":
            print("Not currently implemented for %s; using Llama 3.2 3B Instruct" % consent_method)
        # Initialize Gradio client for the language model Space
        client = Client(space_id, hf_token=HF_TOKEN)

        # The Llama demo exposes a simple /chat endpoint with standard decoding params
        result = client.predict(
            message=prompt,
            max_new_tokens=128,
            temperature=0.6,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.2,
            api_name=api_name,
        )

        # Normalize and clean up model output
        text = _extract_llama_text(result)
        text = process.normalize_text(text, lower=False)

        # Handle empty or malformed outputs
        if not text:
            raise ValueError("Empty response from Llama Space")

        # In case the model produces multiple lines or options, pick the first full sentence
        first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "")
        return first_line or text

    except Exception as e:
        print(f"[gen_sentence_llm] Llama Space call failed: {type(e).__name__}: {e}")
        raise