File size: 2,713 Bytes
b4f404b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
from dotenv import load_dotenv
import os
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_huggingface import HuggingFaceEmbeddings

# Download the Embeddings from HuggingFace
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')  # this model returns 384 dimensions
    return embeddings

# Define the system prompt
system_prompt = (
    "You are an intelligent Personal Portfolio Assistant that answers questions about the user's background, work, and projects. "
    "Use the retrieved context below to provide accurate and natural responses. "
    "If the context does not contain the answer, respond with 'I'm not sure about that.' "
    "Keep your answer concise."
    "\n\n"
    "Context:\n{context}"
)


load_dotenv()

pinecone_api_key = os.environ.get("PINECONE_API_KEY")
if not pinecone_api_key:
    raise ValueError("Missing PINECONE_API_KEY in environment variables.")

app = Flask(__name__)
CORS(app)  # ✅ Allow external web app to access this Flask API

# Load embeddings
embeddings = download_hugging_face_embeddings()
index_name = "portfolio"

docsearch = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embeddings)
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Model
chatModel = ChatOllama(model="gemma3:1b", temperature=0.1, max_tokens=512)

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


@app.route("/")
def index():
    return "✅ RAG server running"


@app.route("/get", methods=["POST"])
def chat():
    user_msg = request.form.get("msg") or request.json.get("msg")

    if not user_msg:
        return jsonify({"error": "No message sent"}), 400

    try:
        response = rag_chain.invoke({"input": user_msg})
        answer = response.get("answer", "Sorry, I couldn’t find an answer.")
        return jsonify({"reply": answer})
    except Exception as e:
        print("Error:", e)
        return jsonify({"reply": f"Server Error: {str(e)}"})


if __name__ == '__main__':
    port = int(os.environ.get('PORT', 2025))
    app.run(host="0.0.0.0", port=port, debug=False)