Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
| 6 |
from transformers import pipeline
|
| 7 |
import time
|
| 8 |
|
| 9 |
-
# --- 1. DATA LOADING AND PREPROCESSING ---
|
| 10 |
print("===== Application Startup =====")
|
| 11 |
start_time = time.time()
|
| 12 |
|
|
@@ -20,29 +20,33 @@ def extract_title_and_ingredients(sample):
|
|
| 20 |
|
| 21 |
def extract_each_feature(sample):
|
| 22 |
"""
|
| 23 |
-
FIXED:
|
| 24 |
"""
|
| 25 |
full_text = sample['input']
|
| 26 |
|
|
|
|
| 27 |
title = full_text[:full_text.find("\\n")]
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
return {
|
| 48 |
"title": title,
|
|
@@ -70,9 +74,10 @@ index = faiss.IndexFlatL2(dimension)
|
|
| 70 |
index.add(embeddings)
|
| 71 |
print(f"Index is ready. Total vectors in index: {index.ntotal}")
|
| 72 |
|
| 73 |
-
# --- 3. SYNTHETIC GENERATION ---
|
| 74 |
print("Loading generative model...")
|
| 75 |
-
|
|
|
|
| 76 |
|
| 77 |
def get_recommendations_and_generate(query_ingredients, k=3):
|
| 78 |
# 1. Get Recommendations
|
|
@@ -90,43 +95,54 @@ def get_recommendations_and_generate(query_ingredients, k=3):
|
|
| 90 |
}
|
| 91 |
results.append(recipe)
|
| 92 |
|
| 93 |
-
# 2. Generate a new recipe with a
|
| 94 |
-
prompt = f"""
|
| 95 |
-
|
| 96 |
-
### Title
|
| 97 |
-
[
|
| 98 |
-
|
| 99 |
-
### Ingredients
|
| 100 |
-
- [
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
---
|
| 109 |
-
Recipe:
|
| 110 |
-
### Title:
|
| 111 |
"""
|
| 112 |
# Generate the recipe text
|
| 113 |
-
generated_outputs = generator(prompt, max_new_tokens=
|
| 114 |
generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
|
| 115 |
|
| 116 |
-
# 3.
|
| 117 |
try:
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
generated_recipe = {
|
| 122 |
-
"title":
|
| 123 |
-
"ingredients":
|
| 124 |
-
"directions":
|
| 125 |
}
|
| 126 |
-
except
|
| 127 |
-
|
| 128 |
generated_recipe = {
|
| 129 |
-
"title": "AI Generated Recipe",
|
| 130 |
"ingredients": "Could not determine ingredients.",
|
| 131 |
"directions": generated_text
|
| 132 |
}
|
|
|
|
| 6 |
from transformers import pipeline
|
| 7 |
import time
|
| 8 |
|
| 9 |
+
# --- 1. DATA LOADING AND PREPROCESSING (FINAL FIX) ---
|
| 10 |
print("===== Application Startup =====")
|
| 11 |
start_time = time.time()
|
| 12 |
|
|
|
|
| 20 |
|
| 21 |
def extract_each_feature(sample):
|
| 22 |
"""
|
| 23 |
+
FIXED: More robustly extracts recipe features and cleans duplicated content.
|
| 24 |
"""
|
| 25 |
full_text = sample['input']
|
| 26 |
|
| 27 |
+
# Extract Title
|
| 28 |
title = full_text[:full_text.find("\\n")]
|
| 29 |
|
| 30 |
+
# Extract Ingredients
|
| 31 |
+
ingredients = "Not available"
|
| 32 |
+
if "Ingredients:" in full_text:
|
| 33 |
+
start_ing = full_text.find("Ingredients:") + len("Ingredients:")
|
| 34 |
+
end_ing = full_text.find("Directions:")
|
| 35 |
+
if end_ing > start_ing:
|
| 36 |
+
ingredients = full_text[start_ing:end_ing].strip()
|
| 37 |
+
|
| 38 |
+
# Extract and clean Directions
|
| 39 |
+
directions = "Not available"
|
| 40 |
+
if "Directions:" in full_text:
|
| 41 |
+
start_dir = full_text.find("Directions:") + len("Directions:")
|
| 42 |
+
# Take the text after the first "Directions:"
|
| 43 |
+
temp_directions = full_text[start_dir:].strip()
|
| 44 |
+
# If "Ingredients:" appears again, it's a duplicate. Cut it off.
|
| 45 |
+
if "Ingredients:" in temp_directions:
|
| 46 |
+
end_dir = temp_directions.find("Ingredients:")
|
| 47 |
+
directions = temp_directions[:end_dir].strip()
|
| 48 |
+
else:
|
| 49 |
+
directions = temp_directions
|
| 50 |
|
| 51 |
return {
|
| 52 |
"title": title,
|
|
|
|
| 74 |
index.add(embeddings)
|
| 75 |
print(f"Index is ready. Total vectors in index: {index.ntotal}")
|
| 76 |
|
| 77 |
+
# --- 3. SYNTHETIC GENERATION (IMPROVED MODEL AND PROMPT) ---
|
| 78 |
print("Loading generative model...")
|
| 79 |
+
# Switched back to gpt2 for better instruction following
|
| 80 |
+
generator = pipeline('text-generation', model='gpt2')
|
| 81 |
|
| 82 |
def get_recommendations_and_generate(query_ingredients, k=3):
|
| 83 |
# 1. Get Recommendations
|
|
|
|
| 95 |
}
|
| 96 |
results.append(recipe)
|
| 97 |
|
| 98 |
+
# 2. Generate a new recipe with a better "few-shot" prompt
|
| 99 |
+
prompt = f"""Given the ingredients: {query_ingredients}, create a complete recipe.
|
| 100 |
+
|
| 101 |
+
### Title ###
|
| 102 |
+
[A creative recipe title]
|
| 103 |
+
|
| 104 |
+
### Ingredients ###
|
| 105 |
+
- [List of ingredients]
|
| 106 |
+
|
| 107 |
+
### Directions ###
|
| 108 |
+
1. [First step]
|
| 109 |
+
2. [Second step]
|
| 110 |
+
3. [And so on...]
|
| 111 |
+
|
| 112 |
+
### Title ###
|
|
|
|
|
|
|
|
|
|
| 113 |
"""
|
| 114 |
# Generate the recipe text
|
| 115 |
+
generated_outputs = generator(prompt, max_new_tokens=250, num_return_sequences=1, pad_token_id=50256)
|
| 116 |
generated_text = generated_outputs[0]['generated_text'].replace(prompt, "").strip()
|
| 117 |
|
| 118 |
+
# 3. More robustly parse the generated text
|
| 119 |
try:
|
| 120 |
+
title = "AI Generated Recipe"
|
| 121 |
+
ingredients = ""
|
| 122 |
+
directions = ""
|
| 123 |
+
|
| 124 |
+
if "### Ingredients ###" in generated_text:
|
| 125 |
+
parts = generated_text.split("### Ingredients ###")
|
| 126 |
+
title = parts[0].strip()
|
| 127 |
+
rest = parts[1]
|
| 128 |
+
if "### Directions ###" in rest:
|
| 129 |
+
dir_parts = rest.split("### Directions ###")
|
| 130 |
+
ingredients = dir_parts[0].strip()
|
| 131 |
+
directions = dir_parts[1].strip()
|
| 132 |
+
else:
|
| 133 |
+
ingredients = rest.strip()
|
| 134 |
+
else:
|
| 135 |
+
directions = generated_text # Fallback
|
| 136 |
+
|
| 137 |
generated_recipe = {
|
| 138 |
+
"title": title,
|
| 139 |
+
"ingredients": ingredients,
|
| 140 |
+
"directions": directions
|
| 141 |
}
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"Error parsing generated text: {e}")
|
| 144 |
generated_recipe = {
|
| 145 |
+
"title": "AI Generated Recipe (Parsing Error)",
|
| 146 |
"ingredients": "Could not determine ingredients.",
|
| 147 |
"directions": generated_text
|
| 148 |
}
|