File size: 4,834 Bytes
bec06d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from flask import Flask, request, jsonify
from flask_cors import CORS
import sys
import os

# Add the src/python directory to the path so we can import our modules
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src', 'python'))

from src.python.vector_store import VectorStore
from embedder_wrapper import SyncEmbedder
from src.python.config import OPENAI_API_KEY, QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app, origins=["http://localhost:3000", "http://localhost:5000", "http://127.0.0.1:3000", "http://127.0.0.1:5000"], supports_credentials=True, allow_headers=["Content-Type", "Authorization"])

# Initialize our components
try:
    vector_store = VectorStore()
    embedder = SyncEmbedder()
    logger.info("Service initialized successfully")
    logger.info(f"Connected to Qdrant at: {QDRANT_URL}")
except Exception as e:
    logger.error(f"Failed to initialize services: {str(e)}")
    raise

@app.route('/chat', methods=['POST'])
def chat():
    """
    Chat endpoint that takes user query and returns a response based on RAG
    Expected JSON format: {"message": "user question"}
    """
    try:
        data = request.get_json()

        if not data or 'message' not in data:
            return jsonify({'error': 'Message field is required'}), 400

        user_message = data['message']

        # Create embedding for the user message
        try:
            query_embedding = embedder.embed_text(user_message)
        except Exception as e:
            logger.error(f"Error creating embedding: {str(e)}")
            return jsonify({'error': f'Error processing your message: {str(e)}'}), 500

        # Search for similar documents in the vector store
        try:
            similar_docs = vector_store.search_similar(query_embedding, top_k=5)
        except Exception as e:
            logger.error(f"Error searching for documents: {str(e)}")
            return jsonify({'error': f'Error retrieving documents: {str(e)}'}), 500

        # Format the retrieved documents as context
        context = "\n".join([doc['content'] for doc in similar_docs])

        # Prepare the prompt for the LLM
        if context.strip() == "":
            # If no context is found, let the model respond without it
            prompt = f"""
            Please answer the following question. If you don't know the answer, please say so.

            Question: {user_message}
            Answer:
            """
        else:
            prompt = f"""
            Answer the question based on the context provided.
            If the answer is not in the context, say "I don't have enough information to answer that question."

            Context: {context}

            Question: {user_message}
            Answer:
            """

        # Use OpenAI API to generate the response
        try:
            from openai import OpenAI

            client = OpenAI(api_key=OPENAI_API_KEY)

            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                max_tokens=500
            )

            bot_response = response.choices[0].message.content.strip()
        except Exception as e:
            logger.error(f"Error calling LLM: {str(e)}")
            return jsonify({'error': f'Error generating response: {str(e)}'}), 500

        return jsonify({
            'response': bot_response,
            'sources': [doc.get('source', '') for doc in similar_docs],
            'scores': [doc.get('score', 0.0) for doc in similar_docs],
            'retrieved_context': context
        })

    except Exception as e:
        logger.error(f"Unexpected error in chat endpoint: {str(e)}")
        return jsonify({'error': str(e)}), 500


@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    return jsonify({'status': 'healthy'})


@app.route('/documents/count', methods=['GET'])
def document_count():
    """Get the count of documents in the vector store"""
    try:
        count = vector_store.get_all_documents_count()
        return jsonify({'count': count})
    except Exception as e:
        logger.error(f"Error getting document count: {str(e)}")
        return jsonify({'error': str(e)}), 500


# Hugging Face Spaces compatible application entry point
if __name__ != '__main__':
    # When run as part of Hugging Face Spaces
    import os
    port = int(os.environ.get('PORT', 5000))
    app.run(debug=False, host='0.0.0.0', port=port)

if __name__ == '__main__':
    # For local development
    app.run(debug=True, host='0.0.0.0', port=5000)