File size: 5,304 Bytes
78b6072
 
 
 
 
 
 
 
 
 
9e5f60e
78b6072
4fa55cd
a44a047
78b6072
d20d3cd
78b6072
 
9e5f60e
78b6072
 
9e5f60e
 
 
f440e38
 
d20d3cd
6f570f6
d20d3cd
 
 
78b6072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bd2896
78b6072
 
d4446e5
78b6072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d20d3cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import os
import uvicorn
import requests
import json
import numpy as np
import faiss
from dotenv import load_dotenv
from collections import defaultdict
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
from langchain_nomic import NomicEmbeddings  # βœ… Using Nomic Embeddings with API Key

# βœ… Redirect Nomic's cache and working directory to writable locations 


# βœ… Load Environment Variables
load_dotenv()
api_key = os.getenv("AIPIPE_API_KEY")
nomic_api_key = os.getenv("NOMIC_API_KEY")  # βœ… Load Nomic API Key

if not api_key:
    raise RuntimeError("Missing AIPIPE API key in environment variables.")

if not nomic_api_key:
    raise RuntimeError("Missing Nomic API key in environment variables.")  # βœ… Ensure API key is available

# βœ… Initialize Nomic Embeddings
embedder = NomicEmbeddings(model="nomic-embed-text-v1.5")

# βœ… Initialize FastAPI
app = FastAPI()

# --- Load Discourse Data ---
try:
    with open("data/discourse_posts.json", "r", encoding="utf-8") as f:
        posts_data = json.load(f)
except FileNotFoundError:
    raise RuntimeError("Could not find 'data/discourse_posts.json'. Ensure the file is in the correct location.")

# Group posts by topic
topics = defaultdict(lambda: {"topic_title": "", "posts": []})
for post in posts_data:
    tid = post["topic_id"]
    topics[tid]["posts"].append(post)
    if "topic_title" in post:
        topics[tid]["topic_title"] = post["topic_title"]

# Sort posts within topics by post_number
for topic in topics.values():
    topic["posts"].sort(key=lambda x: x.get("post_number", 0))

# --- Embedding Setup ---
def normalize(v):
    norm = np.linalg.norm(v)
    return v / norm if norm != 0 else v

embedding_data = []
embeddings = []

# Process topics for FAISS
for tid, data in topics.items():
    posts = data["posts"]
    title = data["topic_title"]
    reply_map = defaultdict(list)
    by_number = {}

    for p in posts:
        pn = p.get("post_number")
        if pn is not None:
            by_number[pn] = p
        parent = p.get("reply_to_post_number")
        reply_map[parent].append(p)

    def extract(pn):
        collected = []
        def dfs(n):
            if n not in by_number:
                return
            p = by_number[n]
            collected.append(p)
            for child in reply_map.get(n, []):
                dfs(child.get("post_number"))
        dfs(pn)
        return collected

    roots = [p for p in posts if not p.get("reply_to_post_number")]
    for root in roots:
        root_num = root.get("post_number", 1)
        thread = extract(root_num)
        text = f"Topic: {title}\n\n" + "\n\n---\n\n".join(
            p.get("content", "").strip() for p in thread if p.get("content")
        )
        emb = normalize(embedder.embed_query(text))  # βœ… Updated Embedding Call
        embedding_data.append({
            "topic_id": tid,
            "topic_title": title,
            "root_post_number": root_num,
            "post_numbers": [p.get("post_number") for p in thread],
            "combined_text": text
        })
        embeddings.append(emb)

# Create FAISS index
index = faiss.IndexFlatIP(len(embeddings[0]))
index.add(np.vstack(embeddings).astype("float32"))

# --- API Input Model ---
class QuestionInput(BaseModel):
    question: str
    #image: str = None  # Optional image input, unused here

# --- AIPIPE API Configuration ---
AIPIPE_URL = "https://aipipe.org/openrouter/v1/chat/completions"
AIPIPE_KEY = api_key

def query_aipipe(prompt):
    headers = {"Authorization": f"Bearer {AIPIPE_KEY}", "Content-Type": "application/json"}
    data = {"model": "gpt-4o-mini", "messages": [{"role": "user", "content": prompt}], "temperature": 0.7}

    response = requests.post(AIPIPE_URL, json=data, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise HTTPException(status_code=500, detail=f"AIPIPE API error: {response.text}")

# --- API Endpoint for Answer Generation ---
@app.post("/api/")
async def answer_question(payload: QuestionInput):
    q = payload.question

    # Ensure query is valid
    if not q:
        raise HTTPException(status_code=400, detail="Question field cannot be empty.")

    # Search FAISS Index
    q_emb = normalize(embedder.embed_query(q)).astype("float32")  # βœ… Updated Query Embedding Call
    D, I = index.search(np.array([q_emb]), 3)

    top_results = []
    for score, idx in zip(D[0], I[0]):
        data = embedding_data[idx]
        top_results.append({
            "score": float(score),
            "text": data["combined_text"],
            "topic_id": data["topic_id"],
            "url": f"https://discourse.onlinedegree.iitm.ac.in/t/{data['topic_id']}"
        })

    # Generate answer using AIPIPE
    try:
        answer_response = query_aipipe(q)
        answer = answer_response.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error fetching response from AIPIPE: {str(e)}")

    links = [{"url": r["url"], "text": r["text"][:120]} for r in top_results]
    return {"answer": answer, "links": links}

# --- Run the Server ---
if __name__ == "__main__":
    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True)