Remostart commited on
Commit
815b4f4
·
1 Parent(s): fef97aa

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ plutus_recommend_index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.10-slim
3
+
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ gcc \
7
+ g++ \
8
+ wget \
9
+ curl \
10
+ git \
11
+ libfaiss-dev \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+
15
+ WORKDIR /app
16
+
17
+
18
+ COPY requirements.txt .
19
+
20
+
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+
24
+ COPY . .
25
+
26
+
27
+ EXPOSE 7860
28
+
29
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "2"]
app/__pycache__/main.cpython-312.pyc ADDED
Binary file (8.44 kB). View file
 
app/__pycache__/model.cpython-312.pyc ADDED
Binary file (11.5 kB). View file
 
app/__pycache__/recommender.cpython-312.pyc ADDED
Binary file (10.5 kB). View file
 
app/main.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+
3
+ import os
4
+ import json
5
+ import logging
6
+ from typing import Optional
7
+ import asyncio
8
+ import uvicorn
9
+ from fastapi import FastAPI, HTTPException
10
+ from fastapi.responses import JSONResponse, StreamingResponse
11
+ from pydantic import BaseModel
12
+ import torch
13
+
14
+ from .model import PlutusModel, SummaryModel
15
+ from .recommender import Recommender
16
+
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger("plutus.api")
19
+
20
+ _CACHE_DIR = os.getenv("HF_HOME", "/home/user/app")
21
+
22
+ DEFAULT_RECOMMEND_JSON = os.getenv(_CACHE_DIR, "recommend.json")
23
+ RECOMMEND_INDEX_PATH = os.path.join(_CACHE_DIR, "plutus_recommend_index.faiss")
24
+ RECOMMEND_META_PATH = os.path.join(_CACHE_DIR, "plutus_recommend_meta.json")
25
+
26
+
27
+ class GenerateCache:
28
+ last_query: Optional[str] = None
29
+ last_topic: Optional[str] = None
30
+ last_personality: Optional[str] = None
31
+ last_level: Optional[str] = None
32
+ last_output: Optional[str] = None
33
+
34
+
35
+ GEN_CACHE = GenerateCache()
36
+
37
+ logger.info("Loading PlutusModel + Recommender...")
38
+
39
+ plutus_model = PlutusModel()
40
+
41
+ recommender = Recommender(
42
+ recommend_json_path=DEFAULT_RECOMMEND_JSON,
43
+ index_path=RECOMMEND_INDEX_PATH,
44
+ meta_path=RECOMMEND_META_PATH
45
+ )
46
+
47
+ summary_model_wrapper = SummaryModel(model_name="Qwen/Qwen3-4B-Instruct-2507")
48
+
49
+ app = FastAPI(title="Plutus Learner API")
50
+
51
+
52
+
53
+
54
+ class GenerateRequest(BaseModel):
55
+ personality: str
56
+ level: str
57
+ topic: str
58
+ query: str
59
+ max_new_tokens: int = 700
60
+ temperature: float = 0.4
61
+ top_p: float = 0.5
62
+
63
+
64
+ class RecommendRequest(BaseModel):
65
+ top_k: int = 5
66
+
67
+
68
+ class SummaryRequest(BaseModel):
69
+ top_k: int = 5
70
+
71
+
72
+
73
+
74
+ @app.get("/health")
75
+ async def health():
76
+ return {"status": "ok", "device": plutus_model.device}
77
+
78
+
79
+
80
+
81
+
82
+ @app.post("/generate")
83
+ async def generate(req: GenerateRequest):
84
+
85
+
86
+ prompt = plutus_model.create_prompt(
87
+ req.personality,
88
+ req.level,
89
+ req.topic,
90
+ req.query
91
+ )
92
+
93
+ output_text = plutus_model.generate(
94
+ prompt,
95
+ max_new_tokens=req.max_new_tokens,
96
+ temperature=req.temperature,
97
+ top_p=req.top_p
98
+ )
99
+
100
+ # Cache everything for summary & follow-up queries
101
+ GEN_CACHE.last_query = req.query
102
+ GEN_CACHE.last_topic = req.topic
103
+ GEN_CACHE.last_personality = req.personality
104
+ GEN_CACHE.last_level = req.level
105
+ GEN_CACHE.last_output = output_text
106
+
107
+ return {
108
+ "text": output_text,
109
+ "topic": req.topic,
110
+ "query": req.query
111
+ }
112
+
113
+
114
+
115
+
116
+
117
+ @app.get("/stream_generate")
118
+ async def stream_generate(personality: str, level: str, topic: str):
119
+
120
+ prompt = plutus_model.create_prompt(personality, level, topic, "")
121
+
122
+ async def generate_events():
123
+ for chunk in plutus_model.stream_generate(prompt):
124
+ yield f"data: {json.dumps({'chunk': chunk})}\n\n"
125
+ await asyncio.sleep(0.01)
126
+
127
+ return StreamingResponse(generate_events(), media_type="text/event-stream")
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+ @app.post("/recommend")
138
+ async def recommend(req: RecommendRequest):
139
+
140
+ if GEN_CACHE.last_query is None:
141
+ raise HTTPException(400, "No query found. Call /generate first.")
142
+
143
+ results = recommender.recommend_for_query(
144
+ query=GEN_CACHE.last_query,
145
+ top_k=req.top_k,
146
+ topic_boost=GEN_CACHE.last_topic
147
+ )
148
+
149
+ cleaned = [
150
+ {"topic": r["topic"], "type": r["type"], "url": r["url"]}
151
+ for r in results
152
+ ]
153
+
154
+ return {"query": GEN_CACHE.last_query, "results": cleaned}
155
+
156
+
157
+
158
+
159
+
160
+ @app.post("/summary")
161
+ async def summary(req: SummaryRequest):
162
+
163
+ if GEN_CACHE.last_output is None:
164
+ raise HTTPException(400, "No generate output found. Call /generate first.")
165
+
166
+ # Get recommended resources
167
+ recs = recommender.recommend_for_query(
168
+ query=GEN_CACHE.last_query,
169
+ top_k=req.top_k,
170
+ topic_boost=GEN_CACHE.last_topic
171
+ )
172
+
173
+
174
+ pretty_recs = []
175
+ for r in recs:
176
+ pretty_recs.append(f"- ({r['type']}) {r['url']}")
177
+
178
+ formatted_resources_for_llm = "\n".join(pretty_recs)
179
+
180
+
181
+ readable_resource_block = f"""
182
+ Here are some helpful resources for further learning:
183
+ {formatted_resources_for_llm}
184
+ """
185
+
186
+
187
+ full_input_text = f"""
188
+ Summarize the explanation below in a clear, simple, structured way.
189
+
190
+ Your summary must include:
191
+ 1. A clean explanation of the topic
192
+ 2. A study roadmap
193
+ 3. A friendly explanation of the recommended learning resources (videos, docs)
194
+
195
+ ---- MAIN CONTENT ----
196
+ {GEN_CACHE.last_output}
197
+
198
+ ---- RECOMMENDED RESOURCES ----
199
+ {readable_resource_block}
200
+ """
201
+
202
+ # Include recommended=recs
203
+ summary_text = summary_model_wrapper.summarize_text(
204
+ full_input_text,
205
+ topic=GEN_CACHE.last_topic,
206
+ level=GEN_CACHE.last_level,
207
+ recommended=recs,
208
+ max_new_tokens=300
209
+ )
210
+
211
+
212
+ cleaned_resources = [
213
+ {"type": r["type"], "url": r["url"]}
214
+ for r in recs
215
+ ]
216
+
217
+ return {
218
+ "topic": GEN_CACHE.last_topic,
219
+ "summary": summary_text,
220
+ "resources": cleaned_resources
221
+ }
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+ @app.post("/admin/build_index")
230
+ async def build_index(force: bool = False):
231
+ recommender.build_index(force=force)
232
+ return {"indexed": len(recommender.meta)}
233
+
234
+
235
+
236
+ if __name__ == "__main__":
237
+ uvicorn.run("app.main:app", host="0.0.0.0", port=7860)
app/model.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from threading import Thread
3
+ from typing import Generator, Dict, Any, List
4
+ import torch
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForCausalLM,
8
+ TextIteratorStreamer
9
+ )
10
+
11
+ logger = logging.getLogger("plutus.model")
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+
15
+ MAIN_MODEL_NAME = "Remostart/Plutus_Tutor_model"
16
+ SUMMARY_MODEL_NAME = "Qwen/Qwen3-4B-Instruct-2507"
17
+
18
+
19
+
20
+ class PlutusModel:
21
+ """
22
+ Handles the main learning model:
23
+ - Teaching prompt
24
+ - Synchronous generation
25
+ - Streaming generation
26
+ - Explaining recommendations
27
+ """
28
+
29
+ def __init__(self, model_name: str = MAIN_MODEL_NAME):
30
+ self.model_name = model_name
31
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
32
+
33
+ logger.info(f"[INIT] Main model running on: {self.device}")
34
+
35
+ self.tokenizer = None
36
+ self.model = None
37
+ self._load()
38
+
39
+
40
+ def _load(self):
41
+ """Loads the main teaching model and tokenizer."""
42
+ try:
43
+ logger.info(f"[LOAD] Loading tokenizer: {self.model_name}")
44
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True)
45
+
46
+ kwargs = {"torch_dtype": torch.float16} if self.device == "cuda" else {}
47
+
48
+ logger.info("[LOAD] Loading main model weights...")
49
+ self.model = AutoModelForCausalLM.from_pretrained(
50
+ self.model_name,
51
+ device_map="auto" if self.device == "cuda" else None,
52
+ low_cpu_mem_usage=True,
53
+ **kwargs
54
+ )
55
+
56
+ self.model.eval()
57
+ logger.info("[READY] Main model successfully loaded.")
58
+
59
+ except Exception as e:
60
+ logger.exception("Main model loading failed")
61
+ raise RuntimeError(f"Main model loading failed: {e}")
62
+
63
+
64
+
65
+ def create_prompt(
66
+ self,
67
+ personality: str,
68
+ level: str,
69
+ topic: str,
70
+ extra_context: str = None
71
+ ) -> str:
72
+
73
+ prompt = (
74
+ f"You are PlutusTutor — the best expert in Cardano's Plutus smart contract ecosystem.\n\n"
75
+ f"User Info:\n"
76
+ f"- Personality: {personality}\n"
77
+ f"- Level: {level}\n"
78
+ f"- Topic: {topic}\n\n"
79
+ "Your task:\n"
80
+ "- Teach with extreme clarity.\n"
81
+ "- Give structured explanations.\n"
82
+ "- Include examples and, where needed, code.\n"
83
+ "- Avoid useless filler.\n"
84
+ "- Adapt tone slightly to user personality.\n\n"
85
+ )
86
+
87
+ if extra_context:
88
+ prompt += f"Additional Context:\n{extra_context}\n\n"
89
+
90
+ prompt += "Begin teaching now.\n\nAssistant:"
91
+
92
+ return prompt
93
+
94
+
95
+
96
+ def generate(
97
+ self,
98
+ prompt: str,
99
+ max_new_tokens: int = 700,
100
+ temperature: float = 0.4,
101
+ top_p: float = 0.5
102
+ ) -> str:
103
+
104
+ try:
105
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
106
+
107
+ outputs = self.model.generate(
108
+ **inputs,
109
+ max_new_tokens=max_new_tokens,
110
+ do_sample=True,
111
+ temperature=temperature,
112
+ top_p=top_p,
113
+ eos_token_id=self.tokenizer.eos_token_id,
114
+ pad_token_id=self.tokenizer.pad_token_id
115
+ )
116
+
117
+ decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
118
+
119
+ if decoded.startswith(prompt):
120
+ decoded = decoded[len(prompt):].strip()
121
+
122
+ return decoded
123
+
124
+ except Exception as e:
125
+ logger.exception("Generation failed")
126
+ return f"[Generation Error] {e}"
127
+
128
+
129
+ def stream_generate(
130
+ self,
131
+ prompt: str,
132
+ max_new_tokens: int = 400,
133
+ temperature: float = 0.4,
134
+ top_p: float = 0.5
135
+ ) -> Generator[str, None, None]:
136
+
137
+ try:
138
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
139
+
140
+ streamer = TextIteratorStreamer(
141
+ self.tokenizer,
142
+ skip_prompt=True,
143
+ skip_special_tokens=True
144
+ )
145
+
146
+ thread = Thread(target=self.model.generate, kwargs={
147
+ **inputs,
148
+ "streamer": streamer,
149
+ "max_new_tokens": max_new_tokens,
150
+ "do_sample": True,
151
+ "temperature": temperature,
152
+ "top_p": top_p,
153
+ "eos_token_id": self.tokenizer.eos_token_id,
154
+ "pad_token_id": self.tokenizer.pad_token_id,
155
+ })
156
+ thread.start()
157
+
158
+ accumulated = ""
159
+ for chunk in streamer:
160
+ accumulated += chunk
161
+ yield accumulated
162
+
163
+ except Exception as e:
164
+ logger.exception("Streaming failed")
165
+ yield f"[Streaming Error] {e}"
166
+
167
+
168
+
169
+ def summarize_recommendations(
170
+ self,
171
+ topic: str,
172
+ items: List[Dict[str, Any]],
173
+ personality: str = None,
174
+ level: str = None,
175
+ max_new_tokens: int = 120
176
+ ) -> str:
177
+
178
+ bullet_list = [
179
+ f"- {item['type'].upper()}: {item.get('title') or item.get('url')} ({item['url']})"
180
+ for item in items
181
+ ]
182
+
183
+ refs = "\n".join(bullet_list)
184
+
185
+ prompt = (
186
+ f"The user is learning: {topic}\n"
187
+ "Here are recommended videos and documents:\n\n"
188
+ f"{refs}\n\n"
189
+ "Explain why these choices are perfect for the user.\n"
190
+ f"Personality: {personality}\n"
191
+ f"Skill Level: {level}\n"
192
+ "Tone should be confident and friendly.\n\nAssistant:"
193
+ )
194
+
195
+ return self.generate(prompt, max_new_tokens=max_new_tokens)
196
+
197
+
198
+
199
+
200
+
201
+ class SummaryModel:
202
+ """
203
+ Runs the summarization LLM:
204
+ - Summaries the full teaching text
205
+ - Adds clarity + structure
206
+ - Used in the /summary endpoint
207
+ """
208
+
209
+ def __init__(self, model_name: str = SUMMARY_MODEL_NAME):
210
+ self.model_name = model_name
211
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
212
+
213
+ logger.info(f"[INIT] Summary model running on: {self.device}")
214
+
215
+ self.tokenizer = None
216
+ self.model = None
217
+ self._load()
218
+
219
+
220
+ def _load(self):
221
+ try:
222
+ logger.info(f"[LOAD] Loading summary tokenizer: {self.model_name}")
223
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True)
224
+
225
+ kwargs = {"torch_dtype": torch.float16} if self.device == "cuda" else {}
226
+
227
+ logger.info(f"[LOAD] Loading summary model: {self.model_name}")
228
+ self.model = AutoModelForCausalLM.from_pretrained(
229
+ self.model_name,
230
+ device_map="auto" if self.device == "cuda" else None,
231
+ low_cpu_mem_usage=True,
232
+ **kwargs
233
+ )
234
+
235
+ self.model.eval()
236
+ logger.info("[READY] Summary model loaded.")
237
+
238
+ except Exception as e:
239
+ logger.exception("Summary model loading failed")
240
+ raise RuntimeError(f"Summary model loading failed: {e}")
241
+
242
+
243
+
244
+ def summarize_text(
245
+ self,
246
+ full_teaching: str,
247
+ topic: str,
248
+ level: str,
249
+ recommended: List[Dict[str, Any]],
250
+ max_new_tokens: int = 400
251
+ ) -> str:
252
+
253
+ # Format RAG references
254
+ refs = "\n".join([
255
+ f"- {item['type'].upper()}: {item.get('title') or item.get('url')} ({item['url']})"
256
+ for item in recommended
257
+ ]) if recommended else "None"
258
+
259
+ prompt = (
260
+ f"You are a world-class summarization assistant.\n\n"
261
+ f"TOPIC: {topic}\n"
262
+ f"LEVEL: {level}\n\n"
263
+ f"Here is the full teaching content you must summarize:\n\n"
264
+ f"{full_teaching}\n\n"
265
+ "Now produce a clean, structured, extremely clear summary.\n"
266
+ "After the summary, recommend these resources clearly:\n\n"
267
+ f"{refs}\n\n"
268
+ "Assistant:"
269
+ )
270
+
271
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
272
+
273
+ out = self.model.generate(
274
+ **inputs,
275
+ max_new_tokens=max_new_tokens,
276
+ temperature=0.2,
277
+ top_p=0.85,
278
+ do_sample=True,
279
+ eos_token_id=self.tokenizer.eos_token_id
280
+ )
281
+
282
+ decoded = self.tokenizer.decode(out[0], skip_special_tokens=True)
283
+
284
+ if decoded.startswith(prompt):
285
+ decoded = decoded[len(prompt):].strip()
286
+
287
+ return decoded.strip()
app/recommender.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import List, Dict, Any
5
+ import numpy as np
6
+
7
+ logger = logging.getLogger("plutus.recommender")
8
+ logging.basicConfig(level=logging.INFO)
9
+
10
+
11
+ _EMB_MODEL_NAME = os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
12
+
13
+
14
+ _CACHE_DIR = os.getenv("HF_HOME", "/home/user/app")
15
+ _INDEX_FILE = os.path.join(_CACHE_DIR, "plutus_recommend_index.faiss")
16
+ _META_FILE = os.path.join(_CACHE_DIR, "plutus_recommend_meta.json")
17
+
18
+ try:
19
+ from sentence_transformers import SentenceTransformer
20
+ import faiss
21
+ except Exception:
22
+ logger.warning(" sentence-transformers or faiss not installed. Ensure both are in requirements.txt")
23
+
24
+
25
+
26
+ class Recommender:
27
+ """
28
+ Embedding-based semantic recommender for Plutus topics.
29
+ Loads resources from recommend.json, builds a FAISS index for fast similarity search.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ recommend_json_path: str,
35
+ emb_model_name: str = _EMB_MODEL_NAME,
36
+ index_path: str = _INDEX_FILE,
37
+ meta_path: str = _META_FILE,
38
+ ):
39
+ self.recommend_json_path = recommend_json_path
40
+ self.emb_model_name = emb_model_name
41
+ self.index_path = index_path
42
+ self.meta_path = meta_path
43
+ self.model = None
44
+ self.index = None
45
+ self.meta: List[Dict[str, Any]] = []
46
+ self.topics_map: Dict[str, Any] = {}
47
+
48
+
49
+ self._load_json()
50
+
51
+
52
+ self._maybe_init_embedding_model()
53
+
54
+
55
+ if os.path.exists(self.index_path) and os.path.exists(self.meta_path):
56
+ try:
57
+ self._load_index()
58
+ except Exception:
59
+ logger.exception("Index load failed — will rebuild on demand.")
60
+ else:
61
+ logger.info("No index found — will build when first used.")
62
+
63
+
64
+ def _load_json(self):
65
+ """Load recommend.json file."""
66
+ if not os.path.exists(self.recommend_json_path):
67
+ raise FileNotFoundError(f"recommend.json not found at: {self.recommend_json_path}")
68
+ with open(self.recommend_json_path, "r", encoding="utf-8") as f:
69
+ self.topics_map = json.load(f)
70
+ logger.info(f"Loaded recommend.json with {len(self.topics_map)} topics.")
71
+
72
+
73
+ def _maybe_init_embedding_model(self):
74
+ if self.model is None:
75
+ try:
76
+ from sentence_transformers import SentenceTransformer
77
+ self.model = SentenceTransformer(self.emb_model_name)
78
+ logger.info(f"Loaded embedding model: {self.emb_model_name}")
79
+ except Exception as e:
80
+ logger.exception(f" Failed to load embedding model: {e}")
81
+ raise RuntimeError("Embedding model not available. Please check dependencies.")
82
+
83
+
84
+ def build_index(self, force: bool = False):
85
+ """
86
+ Builds FAISS index from recommend.json.
87
+ Each document and video becomes a searchable vector.
88
+ Automatically saves the index and metadata to disk.
89
+ """
90
+ if self.index is not None and not force:
91
+ logger.info("Index already built; skipping rebuild.")
92
+ return
93
+
94
+ items = []
95
+ texts = []
96
+
97
+
98
+ for topic, val in self.topics_map.items():
99
+ for d in val.get("docs", []):
100
+ items.append({"topic": topic, "type": "doc", "url": d})
101
+ texts.append(f"{topic} doc {d}")
102
+ for v in val.get("videos", []):
103
+ items.append({"topic": topic, "type": "video", "url": v})
104
+ texts.append(f"{topic} video {v}")
105
+
106
+ if not texts:
107
+ raise ValueError("No docs/videos found in recommend.json to index.")
108
+
109
+ logger.info(f"Encoding {len(texts)} recommendation entries...")
110
+
111
+ emb = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
112
+ faiss.normalize_L2(emb)
113
+ d = emb.shape[1]
114
+
115
+ try:
116
+ index = faiss.IndexFlatIP(d)
117
+ index.add(emb)
118
+ self.index = index
119
+ self.meta = items
120
+
121
+ try:
122
+ faiss.write_index(self.index, self.index_path)
123
+ with open(self.meta_path, "w", encoding="utf-8") as f:
124
+ json.dump(self.meta, f, ensure_ascii=False, indent=2)
125
+ logger.info(f"Saved FAISS index and metadata ({len(items)} items).")
126
+ except Exception:
127
+ logger.warning(" Could not persist index — running in memory only (likely Hugging Face Space).")
128
+ except Exception as e:
129
+ logger.exception(f" Failed to build FAISS index: {e}")
130
+ raise RuntimeError(f"Index build failed: {e}")
131
+
132
+
133
+ def _load_index(self):
134
+ """Loads index and metadata files."""
135
+ import faiss
136
+ self.index = faiss.read_index(self.index_path)
137
+ with open(self.meta_path, "r", encoding="utf-8") as f:
138
+ self.meta = json.load(f)
139
+ logger.info(f"Loaded FAISS index with {len(self.meta)} entries.")
140
+
141
+
142
+ def recommend_for_query(self, query: str, top_k: int = 5, topic_boost: str = None) -> List[Dict[str, Any]]:
143
+ """
144
+ Returns top_k recommended items for `query`.
145
+ Uses cosine similarity (via normalized inner product).
146
+ """
147
+ if self.index is None:
148
+ logger.info("Index not found in memory — building now.")
149
+ self.build_index()
150
+
151
+ q_emb = self.model.encode([query], convert_to_numpy=True)
152
+ import faiss
153
+ faiss.normalize_L2(q_emb)
154
+
155
+ D, I = self.index.search(q_emb, top_k * 3)
156
+ results = []
157
+ seen = set()
158
+
159
+ for score, idx in zip(D[0], I[0]):
160
+ if idx < 0:
161
+ continue
162
+ meta = self.meta[idx]
163
+ key = (meta.get("url"), meta.get("type"))
164
+ if key in seen:
165
+ continue
166
+ seen.add(key)
167
+ results.append({
168
+ "topic": meta.get("topic"),
169
+ "type": meta.get("type"),
170
+ "url": meta.get("url"),
171
+ "score": float(score),
172
+ })
173
+ if len(results) >= top_k:
174
+ break
175
+
176
+
177
+ if topic_boost:
178
+ results.sort(
179
+ key=lambda x: (0 if x["topic"].lower() == topic_boost.lower() else 1, -x["score"])
180
+ )
181
+ else:
182
+ results.sort(key=lambda x: -x["score"])
183
+
184
+ logger.info(f"Recommended {len(results)} items for query: '{query}'")
185
+ return results
plutus_recommend_meta.json ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "topic": "What is Plutus",
4
+ "type": "doc",
5
+ "url": "https://docs.cardano.org/developer-resources/smart-contracts/plutus"
6
+ },
7
+ {
8
+ "topic": "What is Plutus",
9
+ "type": "doc",
10
+ "url": "https://plutus-community.readthedocs.io/en/latest/"
11
+ },
12
+ {
13
+ "topic": "What is Plutus",
14
+ "type": "doc",
15
+ "url": "https://iohk.io/en/blog/posts/2021/04/13/plutus-what-you-need-to-know/"
16
+ },
17
+ {
18
+ "topic": "What is Plutus",
19
+ "type": "video",
20
+ "url": "https://www.youtube.com/watch?v=igV7kMXcdpw"
21
+ },
22
+ {
23
+ "topic": "What is Plutus",
24
+ "type": "video",
25
+ "url": "https://www.youtube.com/watch?v=SvaFFSqyVwM"
26
+ },
27
+ {
28
+ "topic": "What is Plutus",
29
+ "type": "video",
30
+ "url": "https://www.youtube.com/watch?v=zUerLu_GOQs"
31
+ },
32
+ {
33
+ "topic": "Introduction to Plutus Core",
34
+ "type": "doc",
35
+ "url": "https://plutus.cardano.intersectmbo.org/docs/"
36
+ },
37
+ {
38
+ "topic": "Introduction to Plutus Core",
39
+ "type": "doc",
40
+ "url": "https://plutus.cardano.intersectmbo.org/docs/delve-deeper/languages"
41
+ },
42
+ {
43
+ "topic": "Introduction to Plutus Core",
44
+ "type": "doc",
45
+ "url": "https://github.com/IntersectMBO/plutus"
46
+ },
47
+ {
48
+ "topic": "Introduction to Plutus Core",
49
+ "type": "doc",
50
+ "url": "https://well-typed.com/blog/2022/08/plutus-cores/"
51
+ },
52
+ {
53
+ "topic": "Introduction to Plutus Core",
54
+ "type": "video",
55
+ "url": "https://blog.hachi.one/post/an-introduction-to-plutus-core/"
56
+ },
57
+ {
58
+ "topic": "Introduction to Haskell",
59
+ "type": "doc",
60
+ "url": "https://developers.cardano.org/docs/smart-contracts/plutus/"
61
+ },
62
+ {
63
+ "topic": "Introduction to Haskell",
64
+ "type": "doc",
65
+ "url": "https://developers.cardano.org/docs/get-started/haskell/onboarding/"
66
+ },
67
+ {
68
+ "topic": "Introduction to Haskell",
69
+ "type": "video",
70
+ "url": "https://www.youtube.com/watch?v=igV7kMXcdpw"
71
+ },
72
+ {
73
+ "topic": "Introduction to Haskell",
74
+ "type": "video",
75
+ "url": "https://www.emurgo.io/press-news/the-basics-of-haskell-plutus-and-marlowe-cardano/"
76
+ },
77
+ {
78
+ "topic": "Introduction to Plutus Smart Contract",
79
+ "type": "doc",
80
+ "url": "https://plutus.cardano.intersectmbo.org/docs/category/example-an-auction-smart-contract"
81
+ },
82
+ {
83
+ "topic": "Introduction to Plutus Smart Contract",
84
+ "type": "doc",
85
+ "url": "https://intersectmbo.github.io/plutus-apps/main/"
86
+ },
87
+ {
88
+ "topic": "Introduction to Plutus Smart Contract",
89
+ "type": "doc",
90
+ "url": "https://www.joinplank.com/articles/cardano-e2e-dapp-tutorial"
91
+ },
92
+ {
93
+ "topic": "Introduction to Plutus Smart Contract",
94
+ "type": "video",
95
+ "url": "https://www.youtube.com/watch?v=1vTsPQpCcTE"
96
+ },
97
+ {
98
+ "topic": "Introduction to Plutus Smart Contract",
99
+ "type": "video",
100
+ "url": "https://www.youtube.com/watch?v=wNXKiQanLTc"
101
+ },
102
+ {
103
+ "topic": "Introduction to Plutus Smart Contract",
104
+ "type": "video",
105
+ "url": "https://www.youtube.com/watch?v=Q2SaoISFgRE"
106
+ },
107
+ {
108
+ "topic": "Introduction to Plutus Smart Contract",
109
+ "type": "video",
110
+ "url": "https://www.youtube.com/watch?v=-vdn7gDiT9s"
111
+ },
112
+ {
113
+ "topic": "Introduction to eUTXO",
114
+ "type": "doc",
115
+ "url": "https://plutus-pioneer-program.readthedocs.io/en/latest/pioneer/week1.html"
116
+ },
117
+ {
118
+ "topic": "Introduction to eUTXO",
119
+ "type": "doc",
120
+ "url": "https://arxiv.org/abs/2003.14271"
121
+ },
122
+ {
123
+ "topic": "Introduction to eUTXO",
124
+ "type": "doc",
125
+ "url": "https://www.joinplank.com/articles/debugging-plutus-an-introduction-to-low-level-cardano-transactions-in-the-alonzo-era"
126
+ },
127
+ {
128
+ "topic": "Introduction to eUTXO",
129
+ "type": "doc",
130
+ "url": "https://cardanofoundation.org/academy/video/cardano-eutxo-model"
131
+ },
132
+ {
133
+ "topic": "Introduction to eUTXO",
134
+ "type": "video",
135
+ "url": "https://www.youtube.com/watch?v=igV7kMXcdpw"
136
+ },
137
+ {
138
+ "topic": "Introduction to eUTXO",
139
+ "type": "video",
140
+ "url": "https://www.youtube.com/watch?v=Q2SaoISFgRE"
141
+ },
142
+ {
143
+ "topic": "Introduction to eUTXO",
144
+ "type": "video",
145
+ "url": "https://www.youtube.com/watch?v=xiNCkegNt0M"
146
+ },
147
+ {
148
+ "topic": "Introduction to eUTXO",
149
+ "type": "video",
150
+ "url": "https://www.youtube.com/watch?v=BYT914XxqOQ"
151
+ },
152
+ {
153
+ "topic": "Introduction to Plutus Playground",
154
+ "type": "doc",
155
+ "url": "https://plutus-community.readthedocs.io/en/latest/"
156
+ },
157
+ {
158
+ "topic": "Introduction to Plutus Playground",
159
+ "type": "doc",
160
+ "url": "https://docs.cardano.org/developer-resources/smart-contracts/plutus"
161
+ },
162
+ {
163
+ "topic": "Introduction to Plutus Playground",
164
+ "type": "doc",
165
+ "url": "https://www.nmkr.io/step-by-step-guide/how-to-use-cardanos-plutus-playground"
166
+ },
167
+ {
168
+ "topic": "Introduction to Plutus Playground",
169
+ "type": "video",
170
+ "url": "https://www.youtube.com/watch?v=HtjOWAEzWL8"
171
+ },
172
+ {
173
+ "topic": "Introduction to Plutus Playground",
174
+ "type": "video",
175
+ "url": "https://www.youtube.com/playlist?list=PL53JxaGwWUqAE59_XRxhwDuN1QrBgbIav"
176
+ },
177
+ {
178
+ "topic": "Introduction to Plutus Playground",
179
+ "type": "video",
180
+ "url": "https://www.youtube.com/watch?v=DhRS-JvoCw8"
181
+ },
182
+ {
183
+ "topic": "Advantages of Plutus",
184
+ "type": "doc",
185
+ "url": "https://docs.cardano.org/developer-resources/smart-contracts/plutus"
186
+ },
187
+ {
188
+ "topic": "Advantages of Plutus",
189
+ "type": "doc",
190
+ "url": "https://www.essentialcardano.io/article/essential-resources-and-documentation-for-the-plutus-pioneer-program"
191
+ },
192
+ {
193
+ "topic": "Advantages of Plutus",
194
+ "type": "doc",
195
+ "url": "https://www.netsetsoftware.com/insights/mastering-cardano-smart-contracts-build-real-world-web-3-0-solutions-with-plutus/"
196
+ },
197
+ {
198
+ "topic": "Advantages of Plutus",
199
+ "type": "video",
200
+ "url": "https://www.youtube.com/watch?v=SvaFFSqyVwM"
201
+ },
202
+ {
203
+ "topic": "Advantages of Plutus",
204
+ "type": "video",
205
+ "url": "https://www.youtube.com/watch?v=oFgJg_DulKw"
206
+ },
207
+ {
208
+ "topic": "Advantages of Plutus",
209
+ "type": "video",
210
+ "url": "https://www.youtube.com/watch?v=pJUg_2PI4xM"
211
+ },
212
+ {
213
+ "topic": "Advantages of Plutus",
214
+ "type": "video",
215
+ "url": "https://www.antiersolutions.com/blogs/cardano-plutus-smart-contract-a-detailed-discussion/"
216
+ },
217
+ {
218
+ "topic": "Versioning",
219
+ "type": "doc",
220
+ "url": "https://plutus.cardano.intersectmbo.org/docs/working-with-scripts/ledger-language-version"
221
+ },
222
+ {
223
+ "topic": "Versioning",
224
+ "type": "doc",
225
+ "url": "https://plutus.cardano.intersectmbo.org/docs/essential-concepts/versions/"
226
+ },
227
+ {
228
+ "topic": "Versioning",
229
+ "type": "doc",
230
+ "url": "https://medium.com/tap-in-with-taptools/plutus-v3-on-testnet-5130d1e4838a"
231
+ },
232
+ {
233
+ "topic": "Versioning",
234
+ "type": "doc",
235
+ "url": "https://cexplorer.io/article/it-is-smart-to-use-plutus-v2-applications"
236
+ },
237
+ {
238
+ "topic": "Versioning",
239
+ "type": "video",
240
+ "url": "https://plutus.cardano.intersectmbo.org/docs/delve-deeper/further-resources/videos"
241
+ },
242
+ {
243
+ "topic": "Versioning",
244
+ "type": "video",
245
+ "url": "https://www.youtube.com/channel/UCcAwSpbpQDDzEDRQqcDH8Iw/playlists"
246
+ },
247
+ {
248
+ "topic": "Versioning",
249
+ "type": "video",
250
+ "url": "https://iohk.io/en/blog/posts/2024/02/12/unlocking-more-opportunities-with-plutus-v3/"
251
+ },
252
+ {
253
+ "topic": "On Chain and Off-Chain",
254
+ "type": "doc",
255
+ "url": "https://docs.cardano.org/developer-resources/smart-contracts/plutus"
256
+ },
257
+ {
258
+ "topic": "On Chain and Off-Chain",
259
+ "type": "doc",
260
+ "url": "https://www.learningcardano.com/on-chain-validator-scripts/"
261
+ },
262
+ {
263
+ "topic": "On Chain and Off-Chain",
264
+ "type": "doc",
265
+ "url": "https://projectcatalyst.io/funds/5/developer-ecosystem/step-by-step-guide-off-chain-code"
266
+ },
267
+ {
268
+ "topic": "On Chain and Off-Chain",
269
+ "type": "video",
270
+ "url": "https://www.youtube.com/watch?v=2MbzKzoBiak"
271
+ },
272
+ {
273
+ "topic": "On Chain and Off-Chain",
274
+ "type": "video",
275
+ "url": "https://www.youtube.com/watch?v=NM8uqILIR0I"
276
+ },
277
+ {
278
+ "topic": "Cardano CLI",
279
+ "type": "doc",
280
+ "url": "https://plutus.cardano.intersectmbo.org/docs/using-plinth/cli-plutus"
281
+ },
282
+ {
283
+ "topic": "Cardano CLI",
284
+ "type": "doc",
285
+ "url": "https://hydra.family/head-protocol/docs/how-to/commit-script-utxo"
286
+ },
287
+ {
288
+ "topic": "Cardano CLI",
289
+ "type": "doc",
290
+ "url": "https://docs.cardano.org/developer-resources/transaction-tutorials"
291
+ },
292
+ {
293
+ "topic": "Cardano CLI",
294
+ "type": "video",
295
+ "url": "https://www.youtube.com/watch?v=2MbzKzoBiak"
296
+ },
297
+ {
298
+ "topic": "Cardano CLI",
299
+ "type": "video",
300
+ "url": "https://aiken-lang.org/example--hello-world/end-to-end/cardano-cli"
301
+ },
302
+ {
303
+ "topic": "Data Types",
304
+ "type": "doc",
305
+ "url": "https://book.opshin.dev/smart_contract_tour/the_scriptcontext.html"
306
+ },
307
+ {
308
+ "topic": "Data Types",
309
+ "type": "video",
310
+ "url": "https://www.lidonation.com/en/posts/technical-overview-plutus-architecture"
311
+ },
312
+ {
313
+ "topic": "Monad",
314
+ "type": "doc",
315
+ "url": "https://www.youtube.com/watch?v=yOc1Z9zQ1zY"
316
+ },
317
+ {
318
+ "topic": "Monad",
319
+ "type": "video",
320
+ "url": "https://www.youtube.com/watch?v=NM8uqILIR0I"
321
+ },
322
+ {
323
+ "topic": "Minting Policies",
324
+ "type": "doc",
325
+ "url": "https://meshjs.dev/apis/txbuilder/minting"
326
+ },
327
+ {
328
+ "topic": "Minting Policies",
329
+ "type": "video",
330
+ "url": "https://www.youtube.com/watch?v=u1zFGKci3W4"
331
+ },
332
+ {
333
+ "topic": "State Machine",
334
+ "type": "doc",
335
+ "url": "https://learn.lovelace.academy/the-plutus-platform/state-machines/"
336
+ },
337
+ {
338
+ "topic": "State Machine",
339
+ "type": "video",
340
+ "url": "https://www.youtube.com/watch?v=pO18jrw7GkA"
341
+ },
342
+ {
343
+ "topic": "Marlowe",
344
+ "type": "doc",
345
+ "url": "https://crypto.howtoclicks.com/2023/05/24/plutus-vs-marlowe/"
346
+ },
347
+ {
348
+ "topic": "Marlowe",
349
+ "type": "video",
350
+ "url": "https://www.youtube.com/watch?v=X-bUcplFa0o"
351
+ },
352
+ {
353
+ "topic": "Cardano Tools (2025)",
354
+ "type": "doc",
355
+ "url": "https://cardano.org/developers/"
356
+ },
357
+ {
358
+ "topic": "Cardano Tools (2025)",
359
+ "type": "doc",
360
+ "url": "https://www.emurgo.io/press-news/here-are-5-useful-tools-for-cardano-developers/"
361
+ },
362
+ {
363
+ "topic": "Cardano Tools (2025)",
364
+ "type": "doc",
365
+ "url": "https://medium.com/tap-in-with-taptools/cardano-ecosystem-guide-q3-2025-af9d194020e2"
366
+ },
367
+ {
368
+ "topic": "Playgrounds - Local Setup",
369
+ "type": "doc",
370
+ "url": "https://plutuspbl.io/"
371
+ },
372
+ {
373
+ "topic": "Playgrounds - Local Setup",
374
+ "type": "video",
375
+ "url": "https://www.youtube.com/watch?v=g2F9raiGp_s"
376
+ }
377
+ ]
recommend.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "What is Plutus": {
3
+ "docs": [
4
+ "https://docs.cardano.org/developer-resources/smart-contracts/plutus",
5
+ "https://plutus-community.readthedocs.io/en/latest/",
6
+ "https://iohk.io/en/blog/posts/2021/04/13/plutus-what-you-need-to-know/"
7
+ ],
8
+ "videos": [
9
+ "https://www.youtube.com/watch?v=igV7kMXcdpw",
10
+ "https://www.youtube.com/watch?v=SvaFFSqyVwM",
11
+ "https://www.youtube.com/watch?v=zUerLu_GOQs"
12
+ ]
13
+ },
14
+ "Introduction to Plutus Core": {
15
+ "docs": [
16
+ "https://plutus.cardano.intersectmbo.org/docs/",
17
+ "https://plutus.cardano.intersectmbo.org/docs/delve-deeper/languages",
18
+ "https://github.com/IntersectMBO/plutus",
19
+ "https://well-typed.com/blog/2022/08/plutus-cores/"
20
+ ],
21
+ "videos": [
22
+ "https://blog.hachi.one/post/an-introduction-to-plutus-core/"
23
+ ]
24
+ },
25
+ "Introduction to Haskell": {
26
+ "docs": [
27
+ "https://developers.cardano.org/docs/smart-contracts/plutus/",
28
+ "https://developers.cardano.org/docs/get-started/haskell/onboarding/"
29
+ ],
30
+ "videos": [
31
+ "https://www.youtube.com/watch?v=igV7kMXcdpw",
32
+ "https://www.emurgo.io/press-news/the-basics-of-haskell-plutus-and-marlowe-cardano/"
33
+ ]
34
+ },
35
+ "Introduction to Plutus Smart Contract": {
36
+ "docs": [
37
+ "https://plutus.cardano.intersectmbo.org/docs/category/example-an-auction-smart-contract",
38
+ "https://intersectmbo.github.io/plutus-apps/main/",
39
+ "https://www.joinplank.com/articles/cardano-e2e-dapp-tutorial"
40
+ ],
41
+ "videos": [
42
+ "https://www.youtube.com/watch?v=1vTsPQpCcTE",
43
+ "https://www.youtube.com/watch?v=wNXKiQanLTc",
44
+ "https://www.youtube.com/watch?v=Q2SaoISFgRE",
45
+ "https://www.youtube.com/watch?v=-vdn7gDiT9s"
46
+ ]
47
+ },
48
+ "Introduction to eUTXO": {
49
+ "docs": [
50
+ "https://plutus-pioneer-program.readthedocs.io/en/latest/pioneer/week1.html",
51
+ "https://arxiv.org/abs/2003.14271",
52
+ "https://www.joinplank.com/articles/debugging-plutus-an-introduction-to-low-level-cardano-transactions-in-the-alonzo-era",
53
+ "https://cardanofoundation.org/academy/video/cardano-eutxo-model"
54
+ ],
55
+ "videos": [
56
+ "https://www.youtube.com/watch?v=igV7kMXcdpw",
57
+ "https://www.youtube.com/watch?v=Q2SaoISFgRE",
58
+ "https://www.youtube.com/watch?v=xiNCkegNt0M",
59
+ "https://www.youtube.com/watch?v=BYT914XxqOQ"
60
+ ]
61
+ },
62
+ "Introduction to Plutus Playground": {
63
+ "docs": [
64
+ "https://plutus-community.readthedocs.io/en/latest/",
65
+ "https://docs.cardano.org/developer-resources/smart-contracts/plutus",
66
+ "https://www.nmkr.io/step-by-step-guide/how-to-use-cardanos-plutus-playground"
67
+ ],
68
+ "videos": [
69
+ "https://www.youtube.com/watch?v=HtjOWAEzWL8",
70
+ "https://www.youtube.com/playlist?list=PL53JxaGwWUqAE59_XRxhwDuN1QrBgbIav",
71
+ "https://www.youtube.com/watch?v=DhRS-JvoCw8"
72
+ ]
73
+ },
74
+ "Advantages of Plutus": {
75
+ "docs": [
76
+ "https://docs.cardano.org/developer-resources/smart-contracts/plutus",
77
+ "https://www.essentialcardano.io/article/essential-resources-and-documentation-for-the-plutus-pioneer-program",
78
+ "https://www.netsetsoftware.com/insights/mastering-cardano-smart-contracts-build-real-world-web-3-0-solutions-with-plutus/"
79
+ ],
80
+ "videos": [
81
+ "https://www.youtube.com/watch?v=SvaFFSqyVwM",
82
+ "https://www.youtube.com/watch?v=oFgJg_DulKw",
83
+ "https://www.youtube.com/watch?v=pJUg_2PI4xM",
84
+ "https://www.antiersolutions.com/blogs/cardano-plutus-smart-contract-a-detailed-discussion/"
85
+ ]
86
+ },
87
+ "Versioning": {
88
+ "docs": [
89
+ "https://plutus.cardano.intersectmbo.org/docs/working-with-scripts/ledger-language-version",
90
+ "https://plutus.cardano.intersectmbo.org/docs/essential-concepts/versions/",
91
+ "https://medium.com/tap-in-with-taptools/plutus-v3-on-testnet-5130d1e4838a",
92
+ "https://cexplorer.io/article/it-is-smart-to-use-plutus-v2-applications"
93
+ ],
94
+ "videos": [
95
+ "https://plutus.cardano.intersectmbo.org/docs/delve-deeper/further-resources/videos",
96
+ "https://www.youtube.com/channel/UCcAwSpbpQDDzEDRQqcDH8Iw/playlists",
97
+ "https://iohk.io/en/blog/posts/2024/02/12/unlocking-more-opportunities-with-plutus-v3/"
98
+ ]
99
+ },
100
+ "Validation": {
101
+ "Low Level Validation": {
102
+ "docs": [
103
+ "https://iog-academy.gitbook.io/plutus-pioneers-program-fourth-cohort/lectures/lecture-02/lesson-01",
104
+ "https://plutus.cardano.intersectmbo.org/docs/essential-concepts/plinth-and-plutus-core",
105
+ "https://plutus-community.readthedocs.io/en/latest/#Plutus/Lectures/Cohort_03/Lecture_02/",
106
+ "https://iog-academy.gitbook.io/plutus-pioneers-program-fourth-cohort/lectures/lecture-02/lesson-01"
107
+ ],
108
+ "videos": [
109
+ "https://www.youtube.com/watch?v=2MbzKzoBiak",
110
+ "https://www.youtube.com/watch?v=3tcWCZV6L_w"
111
+ ]
112
+ },
113
+ "High Level Validation": {
114
+ "docs": [
115
+ "https://travishorn.github.io/ppp-notes/02-validation-scripts/03-high-level-typed-validation-scripts/",
116
+ "https://plutus.cardano.intersectmbo.org/docs/essential-concepts/plinth-and-plutus-core"
117
+ ],
118
+ "videos": [
119
+ "https://www.youtube.com/watch?v=2MbzKzoBiak",
120
+ "https://www.youtube.com/watch?v=NLZIrcXAlvU"
121
+ ]
122
+ }
123
+ },
124
+ "On Chain and Off-Chain": {
125
+ "docs": [
126
+ "https://docs.cardano.org/developer-resources/smart-contracts/plutus",
127
+ "https://www.learningcardano.com/on-chain-validator-scripts/",
128
+ "https://projectcatalyst.io/funds/5/developer-ecosystem/step-by-step-guide-off-chain-code"
129
+ ],
130
+ "videos": [
131
+ "https://www.youtube.com/watch?v=2MbzKzoBiak",
132
+ "https://www.youtube.com/watch?v=NM8uqILIR0I"
133
+ ]
134
+ },
135
+ "Cardano CLI": {
136
+ "docs": [
137
+ "https://plutus.cardano.intersectmbo.org/docs/using-plinth/cli-plutus",
138
+ "https://hydra.family/head-protocol/docs/how-to/commit-script-utxo",
139
+ "https://docs.cardano.org/developer-resources/transaction-tutorials"
140
+ ],
141
+ "videos": [
142
+ "https://www.youtube.com/watch?v=2MbzKzoBiak",
143
+ "https://aiken-lang.org/example--hello-world/end-to-end/cardano-cli"
144
+ ]
145
+ },
146
+ "Data Types": {
147
+ "docs": [
148
+ "https://book.opshin.dev/smart_contract_tour/the_scriptcontext.html"
149
+ ],
150
+ "videos": [
151
+ "https://www.lidonation.com/en/posts/technical-overview-plutus-architecture"
152
+ ]
153
+ },
154
+ "Monad": {
155
+ "docs": [
156
+ "https://www.youtube.com/watch?v=yOc1Z9zQ1zY"
157
+ ],
158
+ "videos": [
159
+ "https://www.youtube.com/watch?v=NM8uqILIR0I"
160
+ ]
161
+ },
162
+ "Minting Policies": {
163
+ "docs": [
164
+ "https://meshjs.dev/apis/txbuilder/minting"
165
+ ],
166
+ "videos": [
167
+ "https://www.youtube.com/watch?v=u1zFGKci3W4"
168
+ ]
169
+ },
170
+ "State Machine": {
171
+ "docs": [
172
+ "https://learn.lovelace.academy/the-plutus-platform/state-machines/"
173
+ ],
174
+ "videos": [
175
+ "https://www.youtube.com/watch?v=pO18jrw7GkA"
176
+ ]
177
+ },
178
+ "Marlowe": {
179
+ "docs": [
180
+ "https://crypto.howtoclicks.com/2023/05/24/plutus-vs-marlowe/"
181
+ ],
182
+ "videos": [
183
+ "https://www.youtube.com/watch?v=X-bUcplFa0o"
184
+ ]
185
+ },
186
+ "Cardano Tools (2025)": {
187
+ "docs": [
188
+ "https://cardano.org/developers/",
189
+ "https://www.emurgo.io/press-news/here-are-5-useful-tools-for-cardano-developers/",
190
+ "https://medium.com/tap-in-with-taptools/cardano-ecosystem-guide-q3-2025-af9d194020e2"
191
+ ],
192
+ "videos": []
193
+ },
194
+ "Playgrounds - Local Setup": {
195
+ "docs": [
196
+ "https://plutuspbl.io/"
197
+ ],
198
+ "videos": [
199
+ "https://www.youtube.com/watch?v=g2F9raiGp_s"
200
+ ]
201
+ }
202
+ }
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ accelerate
5
+ safetensors
6
+ torch
7
+ sentence-transformers
8
+ faiss-cpu
9
+ python-docx
10
+ aiofiles
11
+ python-multipart
12
+ requests
13
+ gunicorn
tokenizer/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
tokenizer/chat_template.jinja ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- for message in messages %}
18
+ {%- if message.content is string %}
19
+ {%- set content = message.content %}
20
+ {%- else %}
21
+ {%- set content = '' %}
22
+ {%- endif %}
23
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
24
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
25
+ {%- elif message.role == "assistant" %}
26
+ {{- '<|im_start|>' + message.role + '\n' + content }}
27
+ {%- if message.tool_calls %}
28
+ {%- for tool_call in message.tool_calls %}
29
+ {%- if (loop.first and content) or (not loop.first) %}
30
+ {{- '\n' }}
31
+ {%- endif %}
32
+ {%- if tool_call.function %}
33
+ {%- set tool_call = tool_call.function %}
34
+ {%- endif %}
35
+ {{- '<tool_call>\n{"name": "' }}
36
+ {{- tool_call.name }}
37
+ {{- '", "arguments": ' }}
38
+ {%- if tool_call.arguments is string %}
39
+ {{- tool_call.arguments }}
40
+ {%- else %}
41
+ {{- tool_call.arguments | tojson }}
42
+ {%- endif %}
43
+ {{- '}\n</tool_call>' }}
44
+ {%- endfor %}
45
+ {%- endif %}
46
+ {{- '<|im_end|>\n' }}
47
+ {%- elif message.role == "tool" %}
48
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
49
+ {{- '<|im_start|>user' }}
50
+ {%- endif %}
51
+ {{- '\n<tool_response>\n' }}
52
+ {{- content }}
53
+ {{- '\n</tool_response>' }}
54
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
55
+ {{- '<|im_end|>\n' }}
56
+ {%- endif %}
57
+ {%- endif %}
58
+ {%- endfor %}
59
+ {%- if add_generation_prompt %}
60
+ {{- '<|im_start|>assistant\n' }}
61
+ {%- endif %}
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 1010000,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff