import os import json import time import pickle from langchain_community.vectorstores import FAISS from langchain.text_splitter import CharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain.docstore.document import Document from flask import Flask, request, jsonify, render_template import requests from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Get API key from environment variable (HF Spaces sets this) API_KEY = os.getenv("OPENROUTER_API_KEY") if not API_KEY: raise RuntimeError("OPENROUTER_API_KEY environment variable not set") HEADERS = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://github.com/your-username/KairoAPI", # Replace with your actual repository "X-Title": "KairoAPI ChatBot", "OpenRouter-Bypass-Key": API_KEY # Add bypass key to avoid prompt logging } # Ensure data directory exists DATA_DIR = os.path.join(os.getcwd(), "data") os.makedirs(DATA_DIR, exist_ok=True) # Load or preprocess documents docs_path = os.path.join(DATA_DIR, "preprocessed_docs.pkl") if os.path.exists(docs_path): with open(docs_path, "rb") as f: documents = pickle.load(f) else: with open("Chatbotcontent.txt", "r", encoding="utf-8") as file: content = file.read() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_text(content) documents = [Document(page_content=doc) for doc in docs] with open(docs_path, "wb") as f: pickle.dump(documents, f) # Initialize the embedding model print("Initializing embedding model...") model_cache = os.path.join(DATA_DIR, "model_cache") os.makedirs(model_cache, exist_ok=True) embedding_model = HuggingFaceEmbeddings( model_name="intfloat/multilingual-e5-small", cache_folder=model_cache ) print("Embedding model initialized successfully.") # Initialize FAISS index print("Initializing FAISS index...") faiss_path = os.path.join(DATA_DIR, "faiss_index") def create_faiss_index(): """Create a new FAISS index from documents.""" try: print("Creating new FAISS index...") if not os.path.exists(faiss_path): os.makedirs(faiss_path, exist_ok=True) # Create and save the index vs = FAISS.from_documents(documents, embedding_model) vs.save_local(faiss_path) print(f"FAISS index created successfully at {faiss_path}") return vs except Exception as e: print(f"Error creating FAISS index: {str(e)}") raise # Always create a new index in Hugging Face environment print("Creating fresh FAISS index...") vectorstore = create_faiss_index() print("FAISS initialization complete.") # System prompt SYSTEM_PROMPT = """<|system|>Reasoning: ON. You are a helpful assistant representing our organization. Always answer in a clean, first-person tone like a knowledgeable member of the team. Avoid explaining reasoning steps, just give direct answers.<|end|> """ conversation_history = [ {"role": "system", "content": SYSTEM_PROMPT} ] # Flask App app = Flask(__name__) @app.route("/") def index(): return render_template("index.html") @app.route("/chat", methods=["POST"]) def chat(): user_input = request.json.get("message") if not user_input: return jsonify({"error": "No input provided."}), 400 docs = vectorstore.similarity_search(user_input, k=3) context = "\n\n".join([doc.page_content for doc in docs]) user_prompt = f"""Using the following context, answer the user's question in a clean and natural tone like you're part of the organization. Avoid showing reasoning steps. Be brief but informative. {context} Question: {user_input} """ conversation_history.append({"role": "user", "content": user_prompt}) payload = { "model": "openai/gpt-3.5-turbo", # Using a more reliable model "temperature": 0.7, "top_p": 0.95, "max_tokens": 1024, "stream": False, "messages": conversation_history[-8:], "transforms": ["no-tokens"] # Disable token logging } try: response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=HEADERS, data=json.dumps(payload), timeout=10) if response.status_code == 200: reply = response.json()["choices"][0]["message"]["content"] conversation_history.append({"role": "assistant", "content": reply}) return jsonify({"response": reply}) else: return jsonify({"error": f"Error {response.status_code}: {response.text}"}), 500 except requests.exceptions.Timeout: return jsonify({"error": "Request timed out (10s limit exceeded)"}), 504 except Exception as e: return jsonify({"error": f"Unexpected error: {str(e)}"}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)