File size: 4,324 Bytes
c6b5dab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import pandas as pd

# --- Load the Indian Recipes Dataset ---
dataset = load_dataset("nf-analyst/indian_recipe")
recipes = dataset["train"].to_pandas()

# Preprocess: Combine relevant fields for search
recipes["search_text"] = (
    recipes["name"] + " " + 
    recipes["ingredients"].apply(lambda x: ' '.join(x)) + " " + 
    recipes["instructions"].apply(lambda x: ' '.join(x))
)

# --- Enhanced Search Function (Semantic + Keyword) ---
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Initialize semantic search
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(recipes["search_text"].tolist())
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

def search_recipes(query, top_k=3):
    # Semantic search first
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    
    # Keyword verification
    results = []
    for i in indices[0]:
        recipe = recipes.iloc[i]
        if query.lower() in recipe["search_text"].lower():
            results.append({
                "name": recipe["name"],
                "ingredients": recipe["ingredients"],
                "instructions": recipe["instructions"],
                "cook_time": recipe["cook_time"],
                "diet": recipe["diet"]
            })
    return results if results else None

# --- Modified Respond Function ---
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Search our dataset first
    found_recipes = search_recipes(message)
    
    if not found_recipes:
        yield "No matching Indian recipes found. Try terms like 'butter chicken', 'biryani', or 'dal tadka'."
        return
    
    # Format recipes as strict context
    recipe_context = "\n\n".join([
        f"Recipe {i+1}: {r['name']} ({r['diet']}, {r['cook_time']})\n"
        f"Ingredients: {', '.join(r['ingredients'])}\n"
        f"Method: {' '.join(r['instructions'][:3])}..."
        for i, r in enumerate(found_recipes)
    ])
    
    # Force the LLM to only use these recipes
    strict_system_prompt = f"""You are an Indian food expert. ONLY recommend from these verified recipes.
    NEVER invent recipes. If asked for variations, suggest only minor modifications to these:
    
    {recipe_context}
    
    Respond in this format:
    1. First recommend recipe names matching the query
    2. If asked for details, provide ONLY from the recipes above
    3. For substitutions, suggest similar ingredients from these recipes
    """
    
    messages = [{"role": "system", "content": strict_system_prompt}]
    
    # Add conversation history
    for user_msg, bot_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content
        response += token
        yield response

# --- Gradio Interface with Indian Food Examples ---
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are an expert on Indian cuisine.",
            label="System message"
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
    examples=[
        "Vegetarian North Indian dinner",
        "Quick chicken curry",
        "Traditional South Indian breakfast",
        "Gluten-free Indian dessert"
    ],
    title="πŸ› Authentic Indian Recipe Assistant",
    description="Get recommendations ONLY from verified Indian recipes"
)

if __name__ == "__main__":
    demo.launch()