Spaces:
Running
on
T4
Running
on
T4
add documentation back in
Browse files- src/generate.py +13 -4
src/generate.py
CHANGED
|
@@ -8,8 +8,9 @@ This module connects to an external language model (in this case, the public
|
|
| 8 |
Hugging Face Space for Llama 3.2 3B Instruct) to generate natural-sounding
|
| 9 |
sentences that users can read aloud to give informed consent for voice cloning.
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
import os
|
|
@@ -54,6 +55,8 @@ def _extract_llama_text(result: Any) -> str:
|
|
| 54 |
if isinstance(result, (int, float, bool)):
|
| 55 |
return str(result)
|
| 56 |
if isinstance(result, list):
|
|
|
|
|
|
|
| 57 |
parts = []
|
| 58 |
for x in result:
|
| 59 |
s = _extract_llama_text(x)
|
|
@@ -61,6 +64,7 @@ def _extract_llama_text(result: Any) -> str:
|
|
| 61 |
parts.append(s)
|
| 62 |
return " ".join(parts).strip()
|
| 63 |
if isinstance(result, dict):
|
|
|
|
| 64 |
for key in ("text", "response", "content", "generated_text", "message"):
|
| 65 |
v = result.get(key)
|
| 66 |
if isinstance(v, str) and v.strip():
|
|
@@ -70,8 +74,7 @@ def _extract_llama_text(result: Any) -> str:
|
|
| 70 |
|
| 71 |
def gen_sentence(audio_model_name="Chatterbox"):
|
| 72 |
"""
|
| 73 |
-
Always generate a sentence via the LLM.
|
| 74 |
-
but it's ignored to keep the callback signature stable.
|
| 75 |
"""
|
| 76 |
try:
|
| 77 |
return gen_sentence_llm(audio_model_name=audio_model_name)
|
|
@@ -110,7 +113,10 @@ def gen_sentence_llm(
|
|
| 110 |
prompt = get_consent_generation_prompt(audio_model_name)
|
| 111 |
|
| 112 |
try:
|
|
|
|
| 113 |
client = Client(LLAMA_SPACE_ID, hf_token=HF_TOKEN)
|
|
|
|
|
|
|
| 114 |
result = client.predict(
|
| 115 |
message=prompt,
|
| 116 |
max_new_tokens=128,
|
|
@@ -121,12 +127,15 @@ def gen_sentence_llm(
|
|
| 121 |
api_name=LLAMA_API_NAME,
|
| 122 |
)
|
| 123 |
|
|
|
|
| 124 |
text = _extract_llama_text(result)
|
| 125 |
text = process.normalize_text(text, lower=False)
|
| 126 |
|
|
|
|
| 127 |
if not text:
|
| 128 |
raise ValueError("Empty response from Llama Space")
|
| 129 |
|
|
|
|
| 130 |
first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "")
|
| 131 |
return first_line or text
|
| 132 |
|
|
|
|
| 8 |
Hugging Face Space for Llama 3.2 3B Instruct) to generate natural-sounding
|
| 9 |
sentences that users can read aloud to give informed consent for voice cloning.
|
| 10 |
|
| 11 |
+
Functions:
|
| 12 |
+
- _extract_llama_text(): Normalize the API output from the Llama demo.
|
| 13 |
+
- gen_sentence_llm(): Generate a consent sentence from the Llama model Space.
|
| 14 |
"""
|
| 15 |
|
| 16 |
import os
|
|
|
|
| 55 |
if isinstance(result, (int, float, bool)):
|
| 56 |
return str(result)
|
| 57 |
if isinstance(result, list):
|
| 58 |
+
# If multiple segments are returned (e.g., multiple sentences),
|
| 59 |
+
# join them into one string.
|
| 60 |
parts = []
|
| 61 |
for x in result:
|
| 62 |
s = _extract_llama_text(x)
|
|
|
|
| 64 |
parts.append(s)
|
| 65 |
return " ".join(parts).strip()
|
| 66 |
if isinstance(result, dict):
|
| 67 |
+
# Common key names used in Gradio JSON responses
|
| 68 |
for key in ("text", "response", "content", "generated_text", "message"):
|
| 69 |
v = result.get(key)
|
| 70 |
if isinstance(v, str) and v.strip():
|
|
|
|
| 74 |
|
| 75 |
def gen_sentence(audio_model_name="Chatterbox"):
|
| 76 |
"""
|
| 77 |
+
Always generate a sentence via the LLM.
|
|
|
|
| 78 |
"""
|
| 79 |
try:
|
| 80 |
return gen_sentence_llm(audio_model_name=audio_model_name)
|
|
|
|
| 113 |
prompt = get_consent_generation_prompt(audio_model_name)
|
| 114 |
|
| 115 |
try:
|
| 116 |
+
# Initialize Gradio client for the Llama demo Space
|
| 117 |
client = Client(LLAMA_SPACE_ID, hf_token=HF_TOKEN)
|
| 118 |
+
|
| 119 |
+
# The Llama demo exposes a simple /chat endpoint with standard decoding params
|
| 120 |
result = client.predict(
|
| 121 |
message=prompt,
|
| 122 |
max_new_tokens=128,
|
|
|
|
| 127 |
api_name=LLAMA_API_NAME,
|
| 128 |
)
|
| 129 |
|
| 130 |
+
# Normalize and clean up model output
|
| 131 |
text = _extract_llama_text(result)
|
| 132 |
text = process.normalize_text(text, lower=False)
|
| 133 |
|
| 134 |
+
# Handle empty or malformed outputs
|
| 135 |
if not text:
|
| 136 |
raise ValueError("Empty response from Llama Space")
|
| 137 |
|
| 138 |
+
# In case the model produces multiple lines or options, pick the first full sentence
|
| 139 |
first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "")
|
| 140 |
return first_line or text
|
| 141 |
|