File size: 2,838 Bytes
556c1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
aec3e44
556c1d7
 
 
aec3e44
556c1d7
 
 
 
aec3e44
 
556c1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aec3e44
556c1d7
 
 
 
 
 
 
 
 
 
aec3e44
556c1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aec3e44
556c1d7
 
 
 
 
 
 
 
 
 
 
 
aec3e44
556c1d7
 
 
 
 
 
aec3e44
 
 
556c1d7
aec3e44
 
556c1d7
 
aec3e44
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os

# Auto build FAISS index if not present
if not os.path.exists("vector.index"):
    print("vector.index not found. Building embeddings...")
    os.system("python embeddings.py")

from flask import Flask, request, jsonify, render_template
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline

from dotenv import load_dotenv
load_dotenv()

app = Flask(__name__)

# -------------------- HOME --------------------
@app.route("/")
def home():
    return render_template("index.html")


# -------------------- LOAD MODELS --------------------
print("Loading embedding model...")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

print("Loading vector database...")
index = faiss.read_index("vector.index")

with open("documents.txt", "r", encoding="utf-8") as f:
    documents = [line.strip() for line in f.readlines()]

print("Loading language model...")
qa_model = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_new_tokens=120
)

# -------------------- CORE BOT --------------------
def ask_bot(question, top_k=3, similarity_threshold=1.0):

    greetings = [
        "hi", "hello", "hey", "hii", "hola",
        "good morning", "good evening", "good afternoon"
    ]

    if question.lower().strip() in greetings:
        return "Hi 👋 I’m your C++ assistant. What’s on your mind today?"

    # RAG flow
    q_embedding = embedder.encode([question])
    distances, indices = index.search(np.array(q_embedding), top_k)

    if distances[0][0] > similarity_threshold:
        return "I don't know based on the given data."

    context = "\n".join([documents[i] for i in indices[0]])

    prompt = f"""
Answer the question ONLY using the context below.
If the answer is not present, reply exactly:
I don't know based on the given data.

Context:
{context}

Question:
{question}

Answer:
"""

    output = qa_model(prompt)[0]["generated_text"].strip()
    return output


# -------------------- API --------------------
@app.route("/ask", methods=["POST"])
def ask():
    data = request.json
    question = data.get("question", "")

    if not question:
        return jsonify({"answer": "Please provide a question."})

    answer = ask_bot(question)
    return jsonify({"answer": answer})


# -------------------- FEEDBACK (SAFE VERSION – NO EMAIL) --------------------
@app.route("/feedback", methods=["POST"])
def feedback():
    data = request.json
    message = data.get("feedback", "")
    user_email = data.get("email", "Not provided")

    print("📩 New Feedback Received")
    print("From:", user_email)
    print("Message:", message)

    # For now just log feedback instead of emailing (SMTP crashes servers)
    return jsonify({"status": "success"})


# ❌ DO NOT USE app.run() — gunicorn handles the server