Liori25 commited on
Commit
e48456e
Β·
verified Β·
1 Parent(s): aacb665

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import pickle
4
+ import torch
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from IO_pipeline import RecipeDigitalizerPipeline # Importing your image pipeline
7
+
8
+ # ==========================================
9
+ # 1. LOAD RESOURCES (Run once on startup)
10
+ # ==========================================
11
+ print("⏳ Loading Models and Datasets...")
12
+
13
+ # A. Load the Sentence Transformer Model (for embedding the NEW recipe)
14
+ model_name = 'BAAI/bge-small-en-v1.5'
15
+ embedding_model = SentenceTransformer(model_name)
16
+
17
+ # B. Load the Pre-computed Embeddings
18
+ with open('recipe_embeddings.pkl', 'rb') as f:
19
+ data_bundle = pickle.load(f)
20
+ # Extract the matrix of vectors (Assuming dict format from previous step)
21
+ # If you saved just the dataframe, adjust to: stored_embeddings = data_bundle['embedding'].tolist()
22
+ stored_embeddings = data_bundle['embeddings']
23
+
24
+ # C. Load the CSV Dataset (For displaying recipe details)
25
+ df_recipes = pd.read_csv('RecipeData_10K.csv')
26
+ print("βœ… Resources Loaded Successfully!")
27
+
28
+
29
+ # ==========================================
30
+ # 2. CORE FUNCTIONS
31
+ # ==========================================
32
+
33
+ def format_recipe_text(json_data):
34
+ """Converts the JSON output into a readable string."""
35
+ if "error" in json_data:
36
+ return f"Error: {json_data['error']}", ""
37
+
38
+ # Extract fields with safe fallbacks
39
+ title = json_data.get("title", "Unknown Recipe")
40
+ cuisine = json_data.get("cuisine_type", "General")
41
+ difficulty = json_data.get("difficulty", "Medium")
42
+
43
+ ingredients = "\n".join([f"- {item}" for item in json_data.get("ingredients", [])])
44
+ instructions = "\n".join([f"{i+1}. {step}" for i, step in enumerate(json_data.get("instructions", []))])
45
+
46
+ # 1. Readable Text Block
47
+ display_text = (
48
+ f"🍽️ RECIPE: {title}\n"
49
+ f"================================\n"
50
+ f"🌍 Cuisine: {cuisine}\n"
51
+ f"⚑ Difficulty: {difficulty}\n\n"
52
+ f"πŸ›’ INGREDIENTS:\n{ingredients}\n\n"
53
+ f"🍳 INSTRUCTIONS:\n{instructions}"
54
+ )
55
+
56
+ # 2. Search Query (Plain text for the AI model)
57
+ search_query = f"{title} {cuisine} {ingredients} {instructions}"
58
+
59
+ return display_text, search_query
60
+
61
+ def find_similar_recipes(user_query_text):
62
+ """Embeds the user's recipe and finds the top 3 matches."""
63
+
64
+ # 1. Embed the new recipe text
65
+ # BGE model works best with instruction for queries
66
+ instruction = "Represent this recipe for retrieving similar dishes: "
67
+ query_embedding = embedding_model.encode(instruction + user_query_text, convert_to_tensor=True)
68
+
69
+ # 2. Compute Cosine Similarity
70
+ # stored_embeddings must be converted to tensor if it isn't already
71
+ corpus_embeddings = torch.tensor(stored_embeddings)
72
+ cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
73
+
74
+ # 3. Get Top 3 Results
75
+ top_results = torch.topk(cos_scores, k=3)
76
+
77
+ recommendations = ""
78
+
79
+ for score, idx in zip(top_results.values, top_results.indices):
80
+ idx = int(idx)
81
+ row = df_recipes.iloc[idx]
82
+
83
+ rec_title = row['Title']
84
+ # Try to get raw output or construct a summary
85
+ rec_desc = row['Raw_Output'] if 'Raw_Output' in row else "No description available."
86
+ # Truncate description for display
87
+ rec_desc_short = rec_desc[:200] + "..." if len(rec_desc) > 200 else rec_desc
88
+
89
+ recommendations += (
90
+ f"πŸ† MATCH SCORE: {score:.2f}\n"
91
+ f"πŸ“Œ {rec_title}\n"
92
+ f"πŸ“ {rec_desc_short}\n"
93
+ f"--------------------------------------------------\n"
94
+ )
95
+
96
+ return recommendations
97
+
98
+ def magic_pipeline(image_path):
99
+ # Step 1: Image -> Text (Using your imported IO_pipeline)
100
+ digitizer = RecipeDigitalizerPipeline()
101
+ json_result = digitizer.run_pipeline(image_path)
102
+
103
+ # Step 2: Format Text for User
104
+ readable_text, query_text = format_recipe_text(json_result)
105
+
106
+ # Step 3: Find Similar Recipes (only if we have valid text)
107
+ if not query_text:
108
+ return readable_text, "Could not search for similar recipes due to extraction error."
109
+
110
+ similar_recipes_text = find_similar_recipes(query_text)
111
+
112
+ return readable_text, similar_recipes_text
113
+
114
+
115
+ # ==========================================
116
+ # 3. GRADIO UI LAYOUT
117
+ # ==========================================
118
+
119
+ custom_css = """
120
+ #component-0 {max-width: 800px; margin: auto;}
121
+ """
122
+
123
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
124
+
125
+ gr.Markdown("# πŸ‘΅ Legacy Kitchen")
126
+ gr.Markdown("Upload a photo of your handwritten family recipe. We will digitize it and find similar recipes from our database!")
127
+
128
+ with gr.Row():
129
+ with gr.Column():
130
+ input_image = gr.Image(type="filepath", label="Upload Recipe Image")
131
+ submit_btn = gr.Button("✨ Digitize & Find Similar", variant="primary")
132
+
133
+ with gr.Column():
134
+ # Section 3 & 4: Output Text (Digitized)
135
+ output_digitized = gr.Textbox(label="πŸ“– Digitized Recipe", lines=10)
136
+
137
+ # Section 5: Similar Recipes Output
138
+ output_recommendations = gr.Textbox(label="πŸ₯— 3 Similar Recipes Found", lines=10)
139
+
140
+ # Click Event
141
+ submit_btn.click(
142
+ fn=magic_pipeline,
143
+ inputs=input_image,
144
+ outputs=[output_digitized, output_recommendations]
145
+ )
146
+
147
+ # Launch App
148
+ if __name__ == "__main__":
149
+ demo.launch()