mamathew commited on
Commit
6a2c742
Β·
verified Β·
1 Parent(s): 54e89d0

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -6
  2. app.py +154 -0
  3. requirements.txt +10 -0
README.md CHANGED
@@ -1,12 +1,19 @@
1
  ---
2
- title: Sharif Nlp Food Rag
3
- emoji: 🐠
4
- colorFrom: green
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.45.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Food RAG Demo
3
+ emoji: 🍜
4
+ colorFrom: purple
5
+ colorTo: indigo
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
+ license: apache-2.0
10
  ---
11
 
12
+ This Space demonstrates a simple **text + image retrieval** workflow against your uploaded FAISS indexes, with answers generated via the **Hugging Face Inference API** (set `HF_TOKEN` as a Space secret).
13
+
14
+ **Environment variables you can set in the Space:**
15
+ - `TEXT_MODEL_REPO` (default: `<your-username>/text-ft-food-rag`)
16
+ - `CLIP_MODEL_REPO` (default: `<your-username>/clip-ft-food-rag`)
17
+ - `DATASET_REPO` (default: `<your-username>/food-rag-index`)
18
+ - `LLM_ID` (default: `mistralai/Mistral-7B-Instruct-v0.3`)
19
+ - `HF_TOKEN` (set this secret in the Space to call the Inference API)
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Simple HF Space to test your RAG + image/text search with your Hub models.
3
+ # Move this file (and requirements.txt + README.md) into a new Space.
4
+ import os, json
5
+ from dataclasses import dataclass
6
+ from typing import List, Optional, Tuple
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ import faiss
11
+ from PIL import Image
12
+
13
+ from huggingface_hub import snapshot_download
14
+ from sentence_transformers import SentenceTransformer
15
+ import torch
16
+ from transformers import CLIPModel, CLIPProcessor
17
+
18
+ # ========== CONFIG (edit to your repos) ==========
19
+ TEXT_MODEL_REPO = os.environ.get("TEXT_MODEL_REPO", "<your-username>/text-ft-food-rag")
20
+ CLIP_MODEL_REPO = os.environ.get("CLIP_MODEL_REPO", "<your-username>/clip-ft-food-rag")
21
+ DATASET_REPO = os.environ.get("DATASET_REPO", "<your-username>/food-rag-index")
22
+ # LLM via Inference API (set HF_TOKEN in Space secrets). Change to your preferred instruct model.
23
+ LLM_ID = os.environ.get("LLM_ID", "mistralai/Mistral-7B-Instruct-v0.3")
24
+ # =================================================
25
+
26
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
+
28
+ # ---- Download dataset snapshot (FAISS + metas + optionally images/) ----
29
+ DATA_DIR = snapshot_download(repo_id=DATASET_REPO, repo_type="dataset")
30
+
31
+ # Expected files inside DATA_DIR:
32
+ # faiss_text.bin, faiss_image.bin, text_meta.jsonl, image_meta.jsonl
33
+ # images/ (optional) if you want to show pictures next to results
34
+
35
+ def read_jsonl(path: str):
36
+ out = []
37
+ with open(path, "r", encoding="utf-8") as f:
38
+ for line in f:
39
+ line = line.strip()
40
+ if line:
41
+ out.append(json.loads(line))
42
+ return out
43
+
44
+ # Load metas & FAISS
45
+ TEXT_META = read_jsonl(os.path.join(DATA_DIR, "text_meta.jsonl"))
46
+ IMAGE_META = read_jsonl(os.path.join(DATA_DIR, "image_meta.jsonl"))
47
+ T_INDEX = faiss.read_index(os.path.join(DATA_DIR, "faiss_text.bin"))
48
+ I_INDEX = faiss.read_index(os.path.join(DATA_DIR, "faiss_image.bin"))
49
+
50
+ # Load encoders
51
+ text_enc = SentenceTransformer(TEXT_MODEL_REPO, device=DEVICE)
52
+ clip_model = CLIPModel.from_pretrained(CLIP_MODEL_REPO).to(DEVICE)
53
+ clip_proc = CLIPProcessor.from_pretrained(CLIP_MODEL_REPO)
54
+
55
+ # Optional: LLM via HF Inference API (so Spaces don't need to run an LLM locally)
56
+ try:
57
+ from huggingface_hub import InferenceClient
58
+ HF_TOKEN = os.environ.get("HF_TOKEN") # set this in Space -> Settings -> Repository secrets
59
+ client = InferenceClient(model=LLM_ID, token=HF_TOKEN)
60
+ except Exception as e:
61
+ client = None
62
+
63
+ @dataclass
64
+ class Pair:
65
+ rank: int
66
+ idx: int
67
+ doc_id: str
68
+ title: Optional[str]
69
+ score: float
70
+ image_path: Optional[str]
71
+
72
+ def _pair_from_idx(idx: int, score: float, rank: int) -> Pair:
73
+ m = TEXT_META[idx]
74
+ img_path = IMAGE_META[idx].get("image_path")
75
+ return Pair(rank=rank, idx=idx, doc_id=m.get("id"), title=m.get("title"), score=float(score), image_path=img_path)
76
+
77
+ def search_text(q: str, topk: int = 10) -> List[Pair]:
78
+ qv = text_enc.encode([q], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
79
+ D, I = T_INDEX.search(qv, topk)
80
+ out = []
81
+ for r, (i, s) in enumerate(zip(I[0].tolist(), D[0].tolist()), start=1):
82
+ if i < 0: continue
83
+ out.append(_pair_from_idx(i, s, r))
84
+ return out
85
+
86
+ def search_image(img: Image.Image, topk: int = 10) -> List[Pair]:
87
+ inputs = clip_proc(images=[img.convert("RGB")], return_tensors="pt").to(DEVICE)
88
+ with torch.no_grad():
89
+ qv = clip_model.get_image_features(**inputs)
90
+ qv = torch.nn.functional.normalize(qv, dim=1).float().cpu().numpy().astype(np.float32)
91
+ D, I = I_INDEX.search(qv, topk)
92
+ out = []
93
+ for r, (i, s) in enumerate(zip(I[0].tolist(), D[0].tolist()), start=1):
94
+ if i < 0: continue
95
+ out.append(_pair_from_idx(i, s, r))
96
+ return out
97
+
98
+ def build_prompt(question: str, ctx: List[Pair]) -> str:
99
+ lines = ["You are a helpful assistant. Answer the user's question using the given context.",
100
+ "If the answer is not contained in the context, say you don't know.\n",
101
+ "Context:"]
102
+ for p in ctx:
103
+ lines.append(f"- {p.title or ''} (id={p.doc_id}) [score={p.score:.3f}]")
104
+ lines.append(f"\nQuestion: {question}\nAnswer:")
105
+ return "\n".join(lines)
106
+
107
+ def call_llm(prompt: str) -> str:
108
+ if client is None:
109
+ return "(LLM not configured)\n\n" + prompt
110
+ try:
111
+ out = client.text_generation(prompt=prompt, max_new_tokens=256, temperature=0.2, do_sample=True)
112
+ return out.strip()
113
+ except Exception as e:
114
+ return f"(LLM error: {e})\n\n" + prompt
115
+
116
+ def display_gallery(pairs: List[Pair]) -> List[Tuple[str, str]]:
117
+ # Return [(image_path, caption), ...] for Gradio Gallery. Works if images/ folder is included.
118
+ items = []
119
+ for p in pairs:
120
+ if p.image_path:
121
+ local_path = os.path.join(DATA_DIR, p.image_path) if not os.path.isabs(p.image_path) else p.image_path
122
+ if os.path.exists(local_path):
123
+ caption = f"#{p.rank} β€” {p.title or ''}\nscore={p.score:.3f}"
124
+ items.append((local_path, caption))
125
+ return items
126
+
127
+ def answer(question: str, image: Optional[Image.Image], topk: int, k_ctx: int, use_image: bool):
128
+ if use_image and image is not None:
129
+ top = search_image(image, topk=topk)
130
+ else:
131
+ top = search_text(question, topk=topk)
132
+ ctx = top[:max(1, k_ctx)]
133
+ prompt = build_prompt(question, ctx)
134
+ gen = call_llm(prompt)
135
+ gal = display_gallery(top)
136
+ return gen, [[p.rank, p.title or "", f"{p.score:.3f}", p.doc_id] for p in top], gal
137
+
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("# 🍜 Food RAG Demo (text+image search)")
140
+ with gr.Row():
141
+ q = gr.Textbox(label="Question", placeholder="Ask something about a dish, ingredient, etc.")
142
+ img = gr.Image(label="Optional image", type="pil")
143
+ with gr.Row():
144
+ topk = gr.Slider(1, 20, value=10, step=1, label="Top-K search")
145
+ kctx = gr.Slider(1, 10, value=4, step=1, label="K context to LLM")
146
+ use_img = gr.Checkbox(label="Use image for search", value=False)
147
+ btn = gr.Button("Run")
148
+ out_text = gr.Textbox(label="Answer")
149
+ out_table = gr.Dataframe(headers=["Rank", "Title", "Score", "Doc ID"], label="Top-K retrieval")
150
+ out_gallery = gr.Gallery(label="Matches (if images available)", columns=5, height=200)
151
+ btn.click(answer, inputs=[q, img, topk, kctx, use_img], outputs=[out_text, out_table, out_gallery])
152
+
153
+ if __name__ == "__main__":
154
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ gradio>=4.0.0
3
+ huggingface_hub>=0.24.0
4
+ transformers>=4.43.0
5
+ sentence-transformers>=3.0.0
6
+ torch
7
+ torchvision
8
+ pillow
9
+ faiss-cpu
10
+ numpy