Spaces:
Sleeping
Sleeping
File size: 1,417 Bytes
f232d53 7ec4963 d7ab29f 67a207f d7ab29f 67a207f f232d53 7ec4963 67a207f d7ab29f 770e3a9 d7ab29f 67a207f 7ec4963 d7ab29f 67a207f 20c79ed 67a207f d7ab29f 67a207f d7ab29f 67a207f 20c79ed 39cbc9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import os
from fastapi import FastAPI
from datasets import load_dataset
import random
import json
import re
# Set Hugging Face cache directory programmatically as a fallback
os.environ["HF_HOME"] = "/app/cache"
app = FastAPI()
# Load the dataset from Hugging Face, selecting required columns including sentence_type
dataset = load_dataset("UCSC-Admire/idiom-SFT-dataset-561-2024-12-06_00-40-30", split="train").select_columns(["compound", "sentence", "output", "sentence_type"])
# Filter dataset to include only records with sentence_type = "idiomatic"
dataset = dataset.filter(lambda x: x["sentence_type"] == "idiomatic")
@app.get("/api/idioms")
async def get_idioms():
idioms = random.sample(list(dataset), 50)
response = []
for item in idioms:
raw_output = item.get("output", "")
# Strip <output> tags using regex
cleaned_output = re.sub(r"</?output>", "", raw_output).strip()
# Try parsing the JSON to get 'Compound Meaning'
try:
output_json = json.loads(cleaned_output)
compound_meaning = output_json.get("Compound Meaning", "")
except json.JSONDecodeError:
compound_meaning = "" # Fallback if JSON is invalid
response.append({
"idiom": item.get("compound", ""),
"example": item.get("sentence", ""),
"definition": compound_meaning,
})
return response |