File size: 3,690 Bytes
a28f647
 
1288a7e
 
a28f647
f31e6c0
1288a7e
 
 
f31e6c0
 
 
 
 
1288a7e
f31e6c0
 
 
1288a7e
 
f31e6c0
1288a7e
 
 
a28f647
1288a7e
 
 
 
 
 
 
 
f31e6c0
 
 
 
 
 
 
1288a7e
 
 
a28f647
 
 
 
 
 
 
 
 
1288a7e
 
 
 
 
 
 
f31e6c0
 
 
1288a7e
 
 
f31e6c0
1288a7e
 
 
 
f31e6c0
 
 
1288a7e
 
 
 
 
 
 
 
 
a28f647
 
 
1288a7e
a28f647
 
 
 
 
 
1288a7e
a28f647
 
 
f31e6c0
a28f647
 
 
1288a7e
 
 
 
a28f647
 
1288a7e
a28f647
1288a7e
 
 
f31e6c0
 
a28f647
 
 
1288a7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import pandas as pd

# --- Load and Inspect the Indian Recipes Dataset ---
dataset = load_dataset("nf-analyst/indian_recipe")
recipes = dataset["train"].to_pandas()

# Print column names to verify structure
print("Available columns:", recipes.columns.tolist())

# --- Adjusted Preprocessing ---
# Based on the dataset's actual columns (replace these with actual column names from the print output)
recipes["search_text"] = (
    recipes["RecipeName"] + " " + 
    recipes["TranslatedIngredients"] + " " + 
    recipes["TranslatedInstructions"]
)

# --- Semantic Search Setup ---
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(recipes["search_text"].tolist())
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

def search_recipes(query, top_k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return [{
        "name": recipes.iloc[i]["RecipeName"],
        "ingredients": recipes.iloc[i]["TranslatedIngredients"],
        "instructions": recipes.iloc[i]["TranslatedInstructions"],
        "cuisine": recipes.iloc[i]["Cuisine"],
        "course": recipes.iloc[i]["Course"]
    } for i in indices[0]]

# --- Modified Respond Function ---
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    found_recipes = search_recipes(message)
    
    if not found_recipes:
        yield "No matching Indian recipes found. Try terms like 'butter chicken', 'biryani', or 'dal tadka'."
        return
    
    recipe_context = "\n\n".join([
        f"Recipe {i+1}: {r['name']} ({r['cuisine']}, {r['course']})\n"
        f"Ingredients: {r['ingredients']}\n"
        f"Method: {r['instructions'][:200]}..."
        for i, r in enumerate(found_recipes)
    ])
    
    strict_system_prompt = f"""You are an Indian food expert. ONLY recommend from these verified recipes:
    
    {recipe_context}
    
    Respond in this format:
    1. First list matching recipe names
    2. Only provide details when explicitly asked
    3. Never invent recipes or ingredients"""
    
    messages = [{"role": "system", "content": strict_system_prompt}]
    
    for user_msg, bot_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    
    messages.append({"role": "user", "content": message})

    response = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content
        response += token
        yield response

# --- Gradio Interface ---
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are an expert on Indian cuisine.",
            label="System message"
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
    examples=[
        "Vegetarian North Indian dinner",
        "Quick chicken curry",
        "Traditional South Indian breakfast"
    ]
)

if __name__ == "__main__":
    demo.launch()