Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- .gitignore +4 -0
- README.md +9 -10
- abc.py +7 -0
- api.py +154 -0
- auth.json +1 -0
- customProvider.js +16 -0
- metadata.json +0 -0
- pf.yaml +74 -0
- requirements.txt +7 -0
- semantic_search_local.py +177 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__
|
| 3 |
+
|
| 4 |
+
.vercel
|
README.md
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
title: Iitm Scraper
|
| 3 |
-
emoji: π
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
license: mit
|
| 9 |
-
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
To get started, set your OPENAI_API_KEY environment variable, or other required keys for the providers you selected.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
Next, edit promptfooconfig.yaml.
|
| 4 |
+
|
| 5 |
+
Then run:
|
| 6 |
+
```
|
| 7 |
+
promptfoo eval
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
Afterwards, you can view the results by running `promptfoo view`
|
abc.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
|
| 3 |
+
url = "http://127.0.0.1:8000/api"
|
| 4 |
+
data = {"question": "What is AIPIPE?", "api_key": "your-api-key"}
|
| 5 |
+
|
| 6 |
+
response = requests.post(url, json=data)
|
| 7 |
+
print(response.text)
|
api.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import uvicorn
|
| 3 |
+
import requests
|
| 4 |
+
import json
|
| 5 |
+
import numpy as np
|
| 6 |
+
import faiss
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from collections import defaultdict
|
| 9 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 10 |
+
from pydantic import BaseModel
|
| 11 |
+
from sentence_transformers import SentenceTransformer
|
| 12 |
+
|
| 13 |
+
# Initialize FastAPI
|
| 14 |
+
app = FastAPI()
|
| 15 |
+
|
| 16 |
+
# --- Load Environment Variables ---
|
| 17 |
+
load_dotenv()
|
| 18 |
+
api_key = os.getenv("AIPIPE_API_KEY")
|
| 19 |
+
|
| 20 |
+
if not api_key:
|
| 21 |
+
raise RuntimeError("Missing API key in environment variables.")
|
| 22 |
+
|
| 23 |
+
# --- Load Discourse Data ---
|
| 24 |
+
try:
|
| 25 |
+
with open("data/discourse_posts.json", "r", encoding="utf-8") as f:
|
| 26 |
+
posts_data = json.load(f)
|
| 27 |
+
except FileNotFoundError:
|
| 28 |
+
raise RuntimeError("Could not find 'data/discourse_posts.json'. Ensure the file is in the correct location.")
|
| 29 |
+
|
| 30 |
+
# Group posts by topic
|
| 31 |
+
topics = defaultdict(lambda: {"topic_title": "", "posts": []})
|
| 32 |
+
for post in posts_data:
|
| 33 |
+
tid = post["topic_id"]
|
| 34 |
+
topics[tid]["posts"].append(post)
|
| 35 |
+
if "topic_title" in post:
|
| 36 |
+
topics[tid]["topic_title"] = post["topic_title"]
|
| 37 |
+
|
| 38 |
+
# Sort posts within topics by post_number
|
| 39 |
+
for topic in topics.values():
|
| 40 |
+
topic["posts"].sort(key=lambda x: x.get("post_number", 0))
|
| 41 |
+
|
| 42 |
+
# --- Embedding Setup ---
|
| 43 |
+
def normalize(v):
|
| 44 |
+
norm = np.linalg.norm(v)
|
| 45 |
+
return v / norm if norm != 0 else v
|
| 46 |
+
|
| 47 |
+
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 48 |
+
embedding_data = []
|
| 49 |
+
embeddings = []
|
| 50 |
+
|
| 51 |
+
# Process topics for FAISS
|
| 52 |
+
for tid, data in topics.items():
|
| 53 |
+
posts = data["posts"]
|
| 54 |
+
title = data["topic_title"]
|
| 55 |
+
reply_map = defaultdict(list)
|
| 56 |
+
by_number = {}
|
| 57 |
+
|
| 58 |
+
for p in posts:
|
| 59 |
+
pn = p.get("post_number")
|
| 60 |
+
if pn is not None:
|
| 61 |
+
by_number[pn] = p
|
| 62 |
+
parent = p.get("reply_to_post_number")
|
| 63 |
+
reply_map[parent].append(p)
|
| 64 |
+
|
| 65 |
+
def extract(pn):
|
| 66 |
+
collected = []
|
| 67 |
+
def dfs(n):
|
| 68 |
+
if n not in by_number:
|
| 69 |
+
return
|
| 70 |
+
p = by_number[n]
|
| 71 |
+
collected.append(p)
|
| 72 |
+
for child in reply_map.get(n, []):
|
| 73 |
+
dfs(child.get("post_number"))
|
| 74 |
+
dfs(pn)
|
| 75 |
+
return collected
|
| 76 |
+
|
| 77 |
+
roots = [p for p in posts if not p.get("reply_to_post_number")]
|
| 78 |
+
for root in roots:
|
| 79 |
+
root_num = root.get("post_number", 1)
|
| 80 |
+
thread = extract(root_num)
|
| 81 |
+
text = f"Topic: {title}\n\n" + "\n\n---\n\n".join(
|
| 82 |
+
p.get("content", "").strip() for p in thread if p.get("content")
|
| 83 |
+
)
|
| 84 |
+
emb = normalize(embedder.encode(text, convert_to_numpy=True))
|
| 85 |
+
embedding_data.append({
|
| 86 |
+
"topic_id": tid,
|
| 87 |
+
"topic_title": title,
|
| 88 |
+
"root_post_number": root_num,
|
| 89 |
+
"post_numbers": [p.get("post_number") for p in thread],
|
| 90 |
+
"combined_text": text
|
| 91 |
+
})
|
| 92 |
+
embeddings.append(emb)
|
| 93 |
+
|
| 94 |
+
# Create FAISS index
|
| 95 |
+
index = faiss.IndexFlatIP(len(embeddings[0]))
|
| 96 |
+
index.add(np.vstack(embeddings).astype("float32"))
|
| 97 |
+
|
| 98 |
+
# --- API Input Model ---
|
| 99 |
+
class QuestionInput(BaseModel):
|
| 100 |
+
question: str
|
| 101 |
+
image: str = None # Optional image input, unused here
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# --- AIPIPE API Configuration ---
|
| 106 |
+
AIPIPE_URL = "https://your-aipipe-endpoint.com/chat/completions"
|
| 107 |
+
AIPIPE_KEY = api_key
|
| 108 |
+
|
| 109 |
+
def query_aipipe(prompt):
|
| 110 |
+
headers = {"Authorization": f"Bearer {AIPIPE_KEY}", "Content-Type": "application/json"}
|
| 111 |
+
data = {"model": "gpt-4o-mini", "messages": [{"role": "user", "content": prompt}], "temperature": 0.7}
|
| 112 |
+
|
| 113 |
+
response = requests.post(AIPIPE_URL, json=data, headers=headers)
|
| 114 |
+
if response.status_code == 200:
|
| 115 |
+
return response.json()
|
| 116 |
+
else:
|
| 117 |
+
raise HTTPException(status_code=500, detail=f"AIPIPE API error: {response.text}")
|
| 118 |
+
|
| 119 |
+
# --- API Endpoint for Answer Generation ---
|
| 120 |
+
@app.post("/api/")
|
| 121 |
+
async def answer_question(payload: QuestionInput):
|
| 122 |
+
q = payload.question
|
| 123 |
+
|
| 124 |
+
# Ensure query is valid
|
| 125 |
+
if not q:
|
| 126 |
+
raise HTTPException(status_code=400, detail="Question field cannot be empty.")
|
| 127 |
+
|
| 128 |
+
# Search FAISS Index
|
| 129 |
+
q_emb = normalize(embedder.encode(q, convert_to_numpy=True)).astype("float32")
|
| 130 |
+
D, I = index.search(np.array([q_emb]), 3)
|
| 131 |
+
|
| 132 |
+
top_results = []
|
| 133 |
+
for score, idx in zip(D[0], I[0]):
|
| 134 |
+
data = embedding_data[idx]
|
| 135 |
+
top_results.append({
|
| 136 |
+
"score": float(score),
|
| 137 |
+
"text": data["combined_text"],
|
| 138 |
+
"topic_id": data["topic_id"],
|
| 139 |
+
"url": f"https://discourse.onlinedegree.iitm.ac.in/t/{data['topic_id']}"
|
| 140 |
+
})
|
| 141 |
+
|
| 142 |
+
# Generate answer using AIPIPE
|
| 143 |
+
try:
|
| 144 |
+
answer_response = query_aipipe(q)
|
| 145 |
+
answer = answer_response.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
|
| 146 |
+
except Exception as e:
|
| 147 |
+
raise HTTPException(status_code=500, detail=f"Error fetching response from AIPIPE: {str(e)}")
|
| 148 |
+
|
| 149 |
+
links = [{"url": r["url"], "text": r["text"][:120]} for r in top_results]
|
| 150 |
+
return {"answer": answer, "links": links}
|
| 151 |
+
|
| 152 |
+
# --- Run the Server ---
|
| 153 |
+
if __name__ == "__main__":
|
| 154 |
+
uvicorn.run("api:app", reload=True)
|
auth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cookies": [{"name": "OTZ", "value": "8118346_34_34__34_", "domain": "accounts.google.com", "path": "/", "expires": 1751996758, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "NID", "value": "524=pGzN3AM4nktYQ9VMLlUJRVGl_-N9oMB6VQ3y5ZzHlJMUjidFhWpx-wj6mD-1yQgvKheuVi9mm7qessL2ykyLjtbKpfTq2WCgynkS95EkkkqXMq7U75UrCWoPbvTRd6veMOI6C6pElgLmxblvDr-LVidfbiS9qSqChtwuOiYKpHWijMwoWaKiTcbanPCEmvqkNeV-rZtfov0MNPt9PNOo7EQZNz9SzosAi1lykwflQWAbaSe9d-W4R95Sbv0kDcbO-_zQ5Y8TFdvc9yH9gpQhuW2X38R8TBVvkyUMDbNhJyfYE0ojJK7lNUx251m2skHFgYuQFEPY1VxD4JbGuQD_oM3V0N9SOW-omyMI3JTL6nhTIXtiAvOjs7y9ya0O3NfcDbttscYZORgMjI-0rvkqsUiN1XxRTWOuhCC_ZB4H5O44LwdGdyr3MKsWTSMC14osSSLzKFQkNeYUOF4chQhDMDNdZnimF3CXbhFKHs4cj3O8SPfUDNThNJzbxcmd1MnhbKkSt0wrVaU9bo5Xu_S2bdKaGtOZnzv3QOgv0C6S1l2-1PZVkvIzzptNITT0ivV3wenZJjfHwNPdlmb4ICMV8UyexG-LtcDTOYReU8z1Lv6v-isvfYBASqLo_g", "domain": ".google.com", "path": "/", "expires": 1765215958.960009, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "SID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4gJFIH3mFLpSqgSCdtlzmkAACgYKAfMSARISFQHGX2MicfviDNpac-kl2qNmcIh3JhoVAUF8yKrvZgwyDZB41fASWRBTKWv30076", "domain": ".google.com", "path": "/", "expires": 1783964850.960194, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "__Secure-1PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4SMPilUhML70XfSXFgcXh1gACgYKAc0SARISFQHGX2Mia96MxI3v2UvdIesQ37uf-BoVAUF8yKrUcNT98CQMUjngFXRwODfS0076", "domain": ".google.com", "path": "/", "expires": 1783964850.96023, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4JC7T22vhP_XXy2jXs53PRQACgYKAfcSARISFQHGX2MiUHTSX37QUhtQc4NSoQ1CABoVAUF8yKoFTOV_FouQBFWA5RF1_EnW0076", "domain": ".google.com", "path": "/", "expires": 1783964850.960261, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "HSID", "value": "AgSi0p7ogPeWgCNUQ", "domain": ".google.com", "path": "/", "expires": 1783964850.960393, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "SSID", "value": "AiCZVzx9BdRDHVheM", "domain": ".google.com", "path": "/", "expires": 1783964850.960421, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "APISID", "value": "R0cKHrXJralSylP4/Aq8bgiMom9f5Sr5f1", "domain": ".google.com", "path": "/", "expires": 1783964850.960458, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.com", "path": "/", "expires": 1783964850.960483, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-1PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.com", "path": "/", "expires": 1783964850.960525, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.com", "path": "/", "expires": 1783964850.960551, "httpOnly": false, "secure": true, "sameSite": "None"}, {"name": "ACCOUNT_CHOOSER", "value": "AFx_qI7GYU-n0DopkJTbsr_pk9zQ5pY0thVpNAu6s_a2pDOfodRPrPB1DaoafRhjGyD63vbwDRLJnz2ELcUE1SLgCDDXMAL12rdeRMGWP_t_yViqq6u_CSWnLrNKrMD3MDJIgCKNcDOn", "domain": "accounts.google.com", "path": "/", "expires": 1783964850.960575, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Host-GAPS", "value": "1:6NhIn6plsmu1S2-HryhYFnQ_UEOEqr-P85AR0m7sPJLPI1hm7eKy9JdSzqkk55fEVEcYKKanEYe8AP0AO4ovOSK--_OaEQ:S_Y5W5vYNRLpy9BM", "domain": "accounts.google.com", "path": "/", "expires": 1783964850.960603, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "LSID", "value": "s.IN|s.youtube:g.a000xwjUlyTdYTHzF80gbNT7x_ibthdc2qjxxz_h2QwjYFf7S5T8HvrhsOwZ68zOO8PJMD450QACgYKAbUSARISFQHGX2Mit6FsHavPFqVeUb-IjtvZtRoVAUF8yKrKO2C4Yj00E65ati57Kj360076", "domain": "accounts.google.com", "path": "/", "expires": 1783964851.100308, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Host-1PLSID", "value": "s.IN|s.youtube:g.a000xwjUlyTdYTHzF80gbNT7x_ibthdc2qjxxz_h2QwjYFf7S5T8TV6aAzkWzk8QWXGmDWGxugACgYKAQkSARISFQHGX2MiNB9FZAeOCpB0aeacnGyJYxoVAUF8yKqbz93N6uv5thWilVvXWR4q0076", "domain": "accounts.google.com", "path": "/", "expires": 1783964851.100508, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Host-3PLSID", "value": "s.IN|s.youtube:g.a000xwjUlyTdYTHzF80gbNT7x_ibthdc2qjxxz_h2QwjYFf7S5T8JJw8WRM2HuDa-udrKltuVgACgYKAaISARISFQHGX2Mi0Js4l2JXX2ipUuQ_mdnPchoVAUF8yKqNDyQG3flLgDqxLIyIi74F0076", "domain": "accounts.google.com", "path": "/", "expires": 1783964851.10057, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "__Secure-1PSIDTS", "value": "sidts-CjEB5H03P6P3jGo43x7iUx0MEGc4ah0vRt5yvc9XT00Wr_XUEb43WeZg2WvE5FCf1OqNEAA", "domain": ".youtube.com", "path": "/", "expires": 1780940851.445885, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PSIDTS", "value": "sidts-CjEB5H03P6P3jGo43x7iUx0MEGc4ah0vRt5yvc9XT00Wr_XUEb43WeZg2WvE5FCf1OqNEAA", "domain": ".youtube.com", "path": "/", "expires": 1780940851.446026, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "HSID", "value": "AZaNjsQu3eYQM6N3L", "domain": ".youtube.com", "path": "/", "expires": 1783964851.44606, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "SSID", "value": "ADenAUJPbV1Tx51cQ", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446163, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "APISID", "value": "R0cKHrXJralSylP4/Aq8bgiMom9f5Sr5f1", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446187, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446213, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-1PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446235, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446259, "httpOnly": false, "secure": true, "sameSite": "None"}, {"name": "SID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4gJFIH3mFLpSqgSCdtlzmkAACgYKAfMSARISFQHGX2MicfviDNpac-kl2qNmcIh3JhoVAUF8yKrvZgwyDZB41fASWRBTKWv30076", "domain": ".youtube.com", "path": "/", "expires": 1783964851.44628, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "__Secure-1PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4SMPilUhML70XfSXFgcXh1gACgYKAc0SARISFQHGX2Mia96MxI3v2UvdIesQ37uf-BoVAUF8yKrUcNT98CQMUjngFXRwODfS0076", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446306, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4JC7T22vhP_XXy2jXs53PRQACgYKAfcSARISFQHGX2MiUHTSX37QUhtQc4NSoQ1CABoVAUF8yKoFTOV_FouQBFWA5RF1_EnW0076", "domain": ".youtube.com", "path": "/", "expires": 1783964851.446333, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "HSID", "value": "AZaNjsQu3eYQM6N3L", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762236, "httpOnly": true, "secure": false, "sameSite": "Lax"}, {"name": "SSID", "value": "ADenAUJPbV1Tx51cQ", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762376, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "APISID", "value": "R0cKHrXJralSylP4/Aq8bgiMom9f5Sr5f1", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762444, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "SAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762477, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-1PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762502, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PAPISID", "value": "qVH55bIibZoiVmPz/AGVEBc3223LCMUWOH", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762534, "httpOnly": false, "secure": true, "sameSite": "None"}, {"name": "NID", "value": "524=Xm8tnyfiRZdPI-gz8yA_hamoGHU9acfj34QWWhuxcm_rbHeiOL1i6KjHyXk4Adun5DIsKu8N8f37OYW2XYvO6wG6Jyj2AtsNpp78vFllJoC1HHVOVRBIkXG0V21cgTjAZyl2Qcedfrwi7q1X7wVUfhIDUBD4CHF1PTm4YwjW4XAxIRWKwvxSuRClcI8DJTGl5SoIUEj0GGyx", "domain": ".google.co.in", "path": "/", "expires": 1765216051.762559, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "SID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4gJFIH3mFLpSqgSCdtlzmkAACgYKAfMSARISFQHGX2MicfviDNpac-kl2qNmcIh3JhoVAUF8yKrvZgwyDZB41fASWRBTKWv30076", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762587, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "__Secure-1PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4SMPilUhML70XfSXFgcXh1gACgYKAc0SARISFQHGX2Mia96MxI3v2UvdIesQ37uf-BoVAUF8yKrUcNT98CQMUjngFXRwODfS0076", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762623, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PSID", "value": "g.a000xwjUlyyhWXqZaIMuSE3XUnz3p4-cmKnHiAoN0pT4D6NfP0u4JC7T22vhP_XXy2jXs53PRQACgYKAfcSARISFQHGX2MiUHTSX37QUhtQc4NSoQ1CABoVAUF8yKoFTOV_FouQBFWA5RF1_EnW0076", "domain": ".google.co.in", "path": "/", "expires": 1783964851.762652, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "SIDCC", "value": "AKEyXzUFCfCv1cpXJ_r1W4aMYnWqNNHWO64uf_F90cBkOzDRVQRUxPwaplWxjg_RBU_Wvx3KXw", "domain": ".google.com", "path": "/", "expires": 1780940852.107024, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "__Secure-1PSIDCC", "value": "AKEyXzWjEV-GRcLQ_FrvB4nZJ8eDSjl6fOAeVx7AOmdzpQuSX8PJGZ3z5_IR7zhR9_TUmBc-hg", "domain": ".google.com", "path": "/", "expires": 1780940852.107167, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "__Secure-3PSIDCC", "value": "AKEyXzUVBh_9F9t2EXuTR5EhQe8lgmz7BY3W1_q6sY3MHvgOomOpkJpjXpSg1bJSJVc_zW7BTA", "domain": ".google.com", "path": "/", "expires": 1780940852.107219, "httpOnly": true, "secure": true, "sameSite": "None"}, {"name": "_t", "value": "mYXw3oiXxu4xNIUtU9E2cXmEMB08yK9D461vkL8ijLzEC41lU9SgTUkecDDbqpJwoXPZHivj5gh7dPw5Ye2Z%2FSAk2Rtt8QkXuUKUaDRwWCc4DG9j2Gp%2BHBqjFpB%2F3AyW8BcBlkzE5%2BT8E5Arot2Nu023fhbXkVzHO8GHgLctfRL0VzN2Tgh%2Fn%2F%2FYpFVrTNkufiVSP1krNgf%2BaveFark0yuTgdeF14YCVFArh24%2FAhaX7HnS74ihXzi1CejwMknjrvG3gcWCg%2F2MkeLSBXhEOoOCYJswlc7QOCN9dJjcsWFAJvLu1KJU6f%2FNXIq7vB8%2Ba--xJIIe9ITH%2F7QG26Q--8zIvN%2FMJeDdtZmPKnd8lfg%3D%3D", "domain": "discourse.onlinedegree.iitm.ac.in", "path": "/", "expires": 1754588854.026139, "httpOnly": true, "secure": true, "sameSite": "Lax"}, {"name": "_bypass_cache", "value": "true", "domain": "discourse.onlinedegree.iitm.ac.in", "path": "/", "expires": -1, "httpOnly": false, "secure": true, "sameSite": "Lax"}, {"name": "_forum_session", "value": "0wLfPer6SiQZetqZXqSbPeQkcqDMKZPZ%2BaXw19BWoHwGPMzSp0YbA%2BOGKr7QrGC7%2Fct8%2BacG4jy7BdBV0zgSiEBxHAKw8kbSmHRaYHfKWN1UyktwqWL7U%2BNhX3aBlzYWBRh2Ym5Pc7DVKllXYHmvx0w6lSXQdot490FdWo1uwLW79sVQYUIc4OUXpGal85Dc9V3kk8vUD38c69qOOd4%2BHt%2F7ABghWKtgKYmfD0Do1zTno%2FZawFW07jIuOpVIeFs15H7%2B2CzpW57m8o2%2B4FvRfxWr8A3iYjqnVKXIOVrXB6EtvL2aduVzSaJSuUTpIggxkX0UH22ymyDg6lGOX08Z%2Fof84GouSoZZNvDc8CXqeVoBH%2BLrh0cNpKD%2FCFpQEQ%3D%3D--QlOaYw03YSQJlGs4--cEB8HOMUUqgtVsgTmKSQGg%3D%3D", "domain": "discourse.onlinedegree.iitm.ac.in", "path": "/", "expires": -1, "httpOnly": true, "secure": true, "sameSite": "Lax"}], "origins": [{"origin": "https://discourse.onlinedegree.iitm.ac.in", "localStorage": [{"name": "__mbLastAjax", "value": "1749404907183"}, {"name": "discourse_push_notifications_subscribed-21879", "value": ""}, {"name": "discourse_sidebar-section-tags-collapsed", "value": "false"}, {"name": "discourse_desktop_notifications_focus-tracker", "value": "788e1b8dfe2b4052b12424ba130b7217"}, {"name": "discourse_sidebar-section-community-collapsed", "value": "false"}, {"name": "discourse_sidebar-section-categories-collapsed", "value": "false"}, {"name": "safeLocalStorage", "value": "true"}]}]}
|
customProvider.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
module.exports = class CustomAPIProvider {
|
| 2 |
+
id() {
|
| 3 |
+
return "custom-api"; // Ensure this method correctly returns the provider ID.
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
async callApi(prompt) {
|
| 7 |
+
const response = await fetch("http://127.0.0.1:8000/ask", {
|
| 8 |
+
method: "POST",
|
| 9 |
+
headers: { "Content-Type": "application/json" },
|
| 10 |
+
body: JSON.stringify({ question: prompt }),
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
const data = await response.json();
|
| 14 |
+
return { output: data.answer };
|
| 15 |
+
}
|
| 16 |
+
};
|
metadata.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pf.yaml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
providers:
|
| 2 |
+
- id: http://127.0.0.1:8000/api
|
| 3 |
+
label: Local FastAPI
|
| 4 |
+
config:
|
| 5 |
+
url: http://127.0.0.1:8000/api
|
| 6 |
+
method: POST
|
| 7 |
+
headers:
|
| 8 |
+
Content-Type: application/json
|
| 9 |
+
body: |
|
| 10 |
+
{
|
| 11 |
+
"question": "{{prompt}}",
|
| 12 |
+
"api_key": "{{AIPIPE_API_KEY}}"
|
| 13 |
+
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
prompts:
|
| 17 |
+
- "{{prompt}}"
|
| 18 |
+
|
| 19 |
+
tests:
|
| 20 |
+
- name: Model usage confusion
|
| 21 |
+
vars:
|
| 22 |
+
prompt: >
|
| 23 |
+
The question asks to use gpt-3.5-turbo-0125 model, but the AI proxy provided by Anand sir only supports gpt-4o-mini. Should we just use gpt-4o-mini or OpenAI API for gpt-3.5 turbo?
|
| 24 |
+
assert:
|
| 25 |
+
- type: contains
|
| 26 |
+
value: gpt-4o-mini
|
| 27 |
+
|
| 28 |
+
- name: SRS + Bonus display
|
| 29 |
+
vars:
|
| 30 |
+
prompt: >
|
| 31 |
+
If a student scores 10/10 on GA4 as well as a bonus, how would it appear on the dashboard?
|
| 32 |
+
assert:
|
| 33 |
+
- type: contains
|
| 34 |
+
value: bonus mark
|
| 35 |
+
|
| 36 |
+
- name: Docker recommendation
|
| 37 |
+
vars:
|
| 38 |
+
prompt: >
|
| 39 |
+
I know Docker but have not used Podman before. Should I use Docker for this course?
|
| 40 |
+
assert:
|
| 41 |
+
- type: contains
|
| 42 |
+
value: Docker CE
|
| 43 |
+
|
| 44 |
+
- name: TDS Sep 2025 Exam Date
|
| 45 |
+
vars:
|
| 46 |
+
prompt: >
|
| 47 |
+
When is the TDS Sep 2025 end-term exam?
|
| 48 |
+
assert:
|
| 49 |
+
- type: contains
|
| 50 |
+
value: date
|
| 51 |
+
|
| 52 |
+
- name: OpenAI API key validation
|
| 53 |
+
vars:
|
| 54 |
+
prompt: >
|
| 55 |
+
I have my OpenAI API key saved in the .env file. How can I ensure it's loaded correctly?
|
| 56 |
+
assert:
|
| 57 |
+
- type: contains
|
| 58 |
+
value: os.getenv("OPENAI_API_KEY")
|
| 59 |
+
|
| 60 |
+
- name: FastAPI server issue
|
| 61 |
+
vars:
|
| 62 |
+
prompt: >
|
| 63 |
+
My FastAPI server is running, but hitting 127.0.0.1:8000 returns "Not Found." What's wrong?
|
| 64 |
+
assert:
|
| 65 |
+
- type: contains
|
| 66 |
+
value: No route defined for "/"
|
| 67 |
+
|
| 68 |
+
- name: Promptfoo response validation
|
| 69 |
+
vars:
|
| 70 |
+
prompt: >
|
| 71 |
+
How can I validate my Promptfoo configuration file?
|
| 72 |
+
assert:
|
| 73 |
+
- type: contains
|
| 74 |
+
value: promptfoo validate pf.yaml
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
sentence-transformers
|
| 3 |
+
transformers
|
| 4 |
+
torch
|
| 5 |
+
faiss-cpu
|
| 6 |
+
tqdm
|
| 7 |
+
|
semantic_search_local.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# semantic_search_pipeline.py
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import numpy as np
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 9 |
+
import faiss
|
| 10 |
+
|
| 11 |
+
# --- Utility functions ---
|
| 12 |
+
def clean_text(text):
|
| 13 |
+
return " ".join(text.strip().split()) if text else ""
|
| 14 |
+
|
| 15 |
+
def normalize(v):
|
| 16 |
+
norm = np.linalg.norm(v)
|
| 17 |
+
return v / norm if norm != 0 else v
|
| 18 |
+
|
| 19 |
+
# --- Load posts ---
|
| 20 |
+
with open("data/discourse_posts.json", "r", encoding="utf-8") as f:
|
| 21 |
+
posts_data = json.load(f)
|
| 22 |
+
|
| 23 |
+
print(f"β
Loaded {len(posts_data)} posts")
|
| 24 |
+
|
| 25 |
+
# π§ Fix missing 'post_number'
|
| 26 |
+
grouped = defaultdict(list)
|
| 27 |
+
for post in posts_data:
|
| 28 |
+
grouped[post["topic_id"]].append(post)
|
| 29 |
+
|
| 30 |
+
for topic_id, posts in grouped.items():
|
| 31 |
+
for i, post in enumerate(posts, start=1):
|
| 32 |
+
post.setdefault("post_number", i)
|
| 33 |
+
|
| 34 |
+
# --- Group by topic_id ---
|
| 35 |
+
topics = defaultdict(lambda: {"topic_title": "", "posts": []})
|
| 36 |
+
for post in posts_data:
|
| 37 |
+
tid = post["topic_id"]
|
| 38 |
+
topics[tid]["posts"].append(post)
|
| 39 |
+
if "topic_title" in post:
|
| 40 |
+
topics[tid]["topic_title"] = post["topic_title"]
|
| 41 |
+
|
| 42 |
+
for topic in topics.values():
|
| 43 |
+
topic["posts"].sort(key=lambda x: x.get("post_number", 0))
|
| 44 |
+
|
| 45 |
+
print(f"β
Grouped into {len(topics)} topics")
|
| 46 |
+
|
| 47 |
+
# --- Embedding Model ---
|
| 48 |
+
model_name = "all-MiniLM-L6-v2" # Or "GritLM/GritLM-8x7B"
|
| 49 |
+
embedder = SentenceTransformer(model_name)
|
| 50 |
+
|
| 51 |
+
# --- Build reply tree ---
|
| 52 |
+
def build_reply_map(posts):
|
| 53 |
+
reply_map = defaultdict(list)
|
| 54 |
+
posts_by_number = {}
|
| 55 |
+
for post in posts:
|
| 56 |
+
num = post.get("post_number")
|
| 57 |
+
if num is None:
|
| 58 |
+
continue
|
| 59 |
+
posts_by_number[num] = post
|
| 60 |
+
parent = post.get("reply_to_post_number")
|
| 61 |
+
reply_map[parent].append(post)
|
| 62 |
+
return reply_map, posts_by_number
|
| 63 |
+
|
| 64 |
+
def extract_subthread(root_num, reply_map, posts_by_number):
|
| 65 |
+
collected = []
|
| 66 |
+
def dfs(pn):
|
| 67 |
+
if pn not in posts_by_number:
|
| 68 |
+
return
|
| 69 |
+
p = posts_by_number[pn]
|
| 70 |
+
collected.append(p)
|
| 71 |
+
for child in reply_map.get(pn, []):
|
| 72 |
+
dfs(child["post_number"])
|
| 73 |
+
dfs(root_num)
|
| 74 |
+
return collected
|
| 75 |
+
|
| 76 |
+
# --- Embed subthreads ---
|
| 77 |
+
embedding_data = []
|
| 78 |
+
embeddings = []
|
| 79 |
+
|
| 80 |
+
print("π Building subthread embeddings...")
|
| 81 |
+
|
| 82 |
+
for tid, data in tqdm(topics.items()):
|
| 83 |
+
posts = data["posts"]
|
| 84 |
+
title = data["topic_title"]
|
| 85 |
+
reply_map, by_number = build_reply_map(posts)
|
| 86 |
+
|
| 87 |
+
root_posts = [p for p in posts if not p.get("reply_to_post_number")]
|
| 88 |
+
|
| 89 |
+
if not root_posts:
|
| 90 |
+
print(f"β οΈ No root posts found for topic ID {tid}. Skipping.")
|
| 91 |
+
continue
|
| 92 |
+
|
| 93 |
+
for root in root_posts:
|
| 94 |
+
if "post_number" not in root:
|
| 95 |
+
print(f"β οΈ Skipping root post due to missing 'post_number': {root}")
|
| 96 |
+
continue
|
| 97 |
+
root_num = root["post_number"]
|
| 98 |
+
|
| 99 |
+
subthread = extract_subthread(root_num, reply_map, by_number)
|
| 100 |
+
combined = f"Topic: {title}\n\n" + "\n\n---\n\n".join(
|
| 101 |
+
clean_text(p["content"]) for p in subthread if "content" in p
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
emb = embedder.encode(combined, convert_to_numpy=True)
|
| 105 |
+
emb = normalize(emb)
|
| 106 |
+
|
| 107 |
+
embedding_data.append({
|
| 108 |
+
"topic_id": tid,
|
| 109 |
+
"topic_title": title,
|
| 110 |
+
"root_post_number": root_num,
|
| 111 |
+
"post_numbers": [p["post_number"] for p in subthread if "post_number" in p],
|
| 112 |
+
"combined_text": combined
|
| 113 |
+
})
|
| 114 |
+
embeddings.append(emb)
|
| 115 |
+
|
| 116 |
+
if not embeddings:
|
| 117 |
+
print("β No embeddings were generated. Exiting.")
|
| 118 |
+
exit()
|
| 119 |
+
|
| 120 |
+
embeddings = np.vstack(embeddings).astype("float32")
|
| 121 |
+
|
| 122 |
+
# --- Build FAISS index ---
|
| 123 |
+
dim = embeddings.shape[1]
|
| 124 |
+
index = faiss.IndexFlatIP(dim)
|
| 125 |
+
index.add(embeddings)
|
| 126 |
+
|
| 127 |
+
print(f"β
Indexed {len(embedding_data)} subthreads")
|
| 128 |
+
|
| 129 |
+
# --- Semantic retrieval ---
|
| 130 |
+
def retrieve(query, top_k=5):
|
| 131 |
+
q_emb = embedder.encode(query, convert_to_numpy=True)
|
| 132 |
+
q_emb = normalize(q_emb).astype("float32")
|
| 133 |
+
D, I = index.search(np.array([q_emb]), top_k)
|
| 134 |
+
|
| 135 |
+
results = []
|
| 136 |
+
for score, idx in zip(D[0], I[0]):
|
| 137 |
+
data = embedding_data[idx]
|
| 138 |
+
results.append({
|
| 139 |
+
"score": float(score),
|
| 140 |
+
"topic_id": data["topic_id"],
|
| 141 |
+
"topic_title": data["topic_title"],
|
| 142 |
+
"root_post_number": data["root_post_number"],
|
| 143 |
+
"post_numbers": data["post_numbers"],
|
| 144 |
+
"combined_text": data["combined_text"],
|
| 145 |
+
})
|
| 146 |
+
return results
|
| 147 |
+
|
| 148 |
+
# --- QA generation using T5 ---
|
| 149 |
+
gen_model_name = "google/flan-t5-base"
|
| 150 |
+
|
| 151 |
+
tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
|
| 152 |
+
qa_model = AutoModelForSeq2SeqLM.from_pretrained(gen_model_name)
|
| 153 |
+
|
| 154 |
+
def generate_answer(query, contexts, max_len=256):
|
| 155 |
+
context = "\n\n".join(contexts)
|
| 156 |
+
prompt = f"Answer the question based on the following forum discussion:\n\n{context}\n\nQuestion: {query}\nAnswer:"
|
| 157 |
+
inputs = tokenizer(prompt, return_tensors="pt", max_length=4096, truncation=True)
|
| 158 |
+
outputs = qa_model.generate(**inputs, max_length=max_len, num_beams=5, early_stopping=True)
|
| 159 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 160 |
+
|
| 161 |
+
# --- Run Example ---
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
query = "If a student scores 10/10 on GA4 as well as a bonus, how would it appear on the dashboard?"
|
| 164 |
+
|
| 165 |
+
results = retrieve(query, top_k=3)
|
| 166 |
+
|
| 167 |
+
print("\nπ Top Retrieved Threads:")
|
| 168 |
+
for i, r in enumerate(results, 1):
|
| 169 |
+
print(f"\n[{i}] Score: {r['score']:.4f}")
|
| 170 |
+
print(f"Topic Title: {r['topic_title']}")
|
| 171 |
+
print(f"Root Post #: {r['root_post_number']} | Post IDs: {r['post_numbers']}")
|
| 172 |
+
print(f"Snippet:\n{r['combined_text'][:300]}...\n")
|
| 173 |
+
|
| 174 |
+
contexts = [r["combined_text"] for r in results]
|
| 175 |
+
answer = generate_answer(query, contexts)
|
| 176 |
+
|
| 177 |
+
print("\nπ‘ Generated Answer:\n", answer)
|