Spaces:
Running
on
T4
Running
on
T4
File size: 5,575 Bytes
794256c 9c20baa 9c712b0 9c20baa 794256c a8401a3 14f5917 a8401a3 794256c 1a5dce9 794256c 14f5917 794256c 1a5dce9 9c712b0 1a5dce9 9c712b0 14f5917 794256c 9c20baa 794256c 9c20baa 794256c e8d021a bc5d859 9c712b0 bc5d859 e8d021a bc5d859 1a5dce9 e8d021a a8401a3 e8d021a 9c712b0 e8d021a 9c712b0 e8d021a 1a5dce9 e8d021a 9c712b0 794256c 9c712b0 9c20baa 794256c 9c712b0 794256c 9c20baa 794256c 9c20baa 794256c 9c20baa 794256c bc5d859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# src/generate.py
"""
Module: generate
----------------
Handles the generation of "consent sentences" for the Voice Consent Gate demo.
This module connects to an external language model (in this case, the public
Hugging Face Space for Llama 3.2 3B Instruct) to generate natural-sounding
sentences that users can read aloud to give informed consent for voice cloning.
Functions:
- _extract_llama_text(): Normalize the API output from the Llama demo.
- gen_sentence(): Wrapper for gen_sentence_llm(); previously supported other options.
- gen_sentence_llm(): Generate a consent sentence from the Llama model Space.
"""
import os
from typing import Any
from gradio_client import Client
import src.process as process
from src.prompts import get_consent_generation_prompt
# ------------------- Model / Space Configuration -------------------
# The demo connects to the Llama 3.2 3B Instruct Space on Hugging Face.
# You can override these defaults by setting environment variables in your Space.
LLAMA_SPACE_ID = os.getenv(
"LLAMA_SPACE_ID", "huggingface-projects/llama-3.2-3B-Instruct"
)
LLAMA_API_NAME = "/chat" # The Space exposes a single /chat endpoint.
HF_TOKEN = os.getenv("HF_TOKEN") # Optional; not required for public Spaces.
def _extract_llama_text(result: Any) -> str:
"""
Normalize the API response from the Llama 3.2 3B demo Space into plain text.
The Space’s `/chat` endpoint may return different shapes depending on how
the Gradio app is structured — sometimes a string, other times a dictionary
or list. This function recursively traverses and extracts the first
meaningful text string it finds.
Parameters
result : The raw output returned by `client.predict()`.
str : Cleaned text output (may be empty string if extraction fails).
"""
if isinstance(result, str):
return result.strip()
if isinstance(result, (int, float, bool)):
return str(result)
if isinstance(result, list):
# If multiple segments are returned (e.g., multiple sentences),
# join them into one string.
parts = []
for x in result:
s = _extract_llama_text(x)
if s:
parts.append(s)
return " ".join(parts).strip()
if isinstance(result, dict):
# Common key names used in Gradio JSON responses
for key in ("text", "response", "content", "generated_text", "message"):
v = result.get(key)
if isinstance(v, str) and v.strip():
return v.strip()
return ""
def gen_sentence(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox"):
"""
Always generates a sentence via the LLM.
Parameters
consent_method: str
The language model used to generate a consent sentence
voice_clone_model: str
The voice cloning model
"""
try:
return gen_sentence_llm(consent_method, voice_clone_model)
except Exception as e:
# Show a helpful message directly in the Target sentence box
return f"[ERROR calling LLM] {type(e).__name__}: {e}"
# TODO: Support more than just Llama 3.2 3B Instruct
def gen_sentence_llm(consent_method="Llama 3.2 3B Instruct", voice_clone_model="Chatterbox") -> str:
"""
Generate a consent sentence using the Llama 3.2 3B Instruct demo Space.
This function constructs a prompt describing the linguistic and ethical
requirements for a consent sentence (via `get_consent_generation_prompt`)
and sends it to the Llama demo hosted on Hugging Face Spaces.
The response is normalized into a single English sentence suitable
for reading aloud.
Parameters
consent_method : str
The name of the language model used to generate the consent utterance.
Currently just implemented for Llama 3.2 3B Instruct.
audio_model_name : str
The name of the voice-cloning model to mention in the sentence.
Defaults to "Chatterbox".
Returns
str
A clean, human-readable consent sentence.
"""
# Generate the full natural-language prompt that the LLM will receive
prompt = get_consent_generation_prompt(voice_clone_model)
space_id = LLAMA_SPACE_ID
api_name = LLAMA_API_NAME
try:
# Currently always true.
if consent_method != "Llama 3.2 3B Instruct":
print("Not currently implemented for %s; using Llama 3.2 3B Instruct" % consent_method)
# Initialize Gradio client for the language model Space
client = Client(space_id, hf_token=HF_TOKEN)
# The Llama demo exposes a simple /chat endpoint with standard decoding params
result = client.predict(
message=prompt,
max_new_tokens=128,
temperature=0.6,
top_p=0.9,
top_k=50,
repetition_penalty=1.2,
api_name=api_name,
)
# Normalize and clean up model output
text = _extract_llama_text(result)
text = process.normalize_text(text, lower=False)
# Handle empty or malformed outputs
if not text:
raise ValueError("Empty response from Llama Space")
# In case the model produces multiple lines or options, pick the first full sentence
first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "")
return first_line or text
except Exception as e:
print(f"[gen_sentence_llm] Llama Space call failed: {type(e).__name__}: {e}")
raise |