Nick Starkov commited on
Commit
20c79ed
·
1 Parent(s): 302c8fe
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  from fastapi import FastAPI
3
  from datasets import load_dataset
4
  import random
 
 
5
 
6
  # Set Hugging Face cache directory programmatically as a fallback
7
  os.environ["HF_HOME"] = "/app/cache"
@@ -13,14 +15,25 @@ dataset = load_dataset("UCSC-Admire/idiom-SFT-dataset-561-2024-12-06_00-40-30",
13
 
14
  @app.get("/api/idioms")
15
  async def get_idioms():
16
- # Select 50 random idioms from the dataset
17
  idioms = random.sample(list(dataset), 50)
18
- # Extract required fields
19
  response = []
 
20
  for item in idioms:
 
 
 
 
 
 
 
 
 
 
 
21
  response.append({
22
  "idiom": item.get("compound", ""),
23
  "example": item.get("sentence", ""),
24
- "definition": item.get("output", ""),
25
  })
 
26
  return response
 
2
  from fastapi import FastAPI
3
  from datasets import load_dataset
4
  import random
5
+ import json
6
+ import re
7
 
8
  # Set Hugging Face cache directory programmatically as a fallback
9
  os.environ["HF_HOME"] = "/app/cache"
 
15
 
16
  @app.get("/api/idioms")
17
  async def get_idioms():
 
18
  idioms = random.sample(list(dataset), 50)
 
19
  response = []
20
+
21
  for item in idioms:
22
+ raw_output = item.get("output", "")
23
+ # Strip <output> tags using regex
24
+ cleaned_output = re.sub(r"</?output>", "", raw_output).strip()
25
+
26
+ # Try parsing the JSON to get 'Compound Meaning'
27
+ try:
28
+ output_json = json.loads(cleaned_output)
29
+ compound_meaning = output_json.get("Compound Meaning", "")
30
+ except json.JSONDecodeError:
31
+ compound_meaning = "" # Fallback if JSON is invalid
32
+
33
  response.append({
34
  "idiom": item.get("compound", ""),
35
  "example": item.get("sentence", ""),
36
+ "definition": compound_meaning,
37
  })
38
+
39
  return response