import streamlit as st import chromadb import requests import os # HF model to use (small + free) MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1" API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}" API_TOKEN = st.secrets["HUGGINGFACEHUB_API_TOKEN"] # Setup headers headers = { "Authorization": f"Bearer {API_TOKEN}" } # Load Chroma DB chroma_client = chromadb.PersistentClient(path="chroma_store") collection = chroma_client.get_or_create_collection(name="tech_docs") # HF API call def query_huggingface(prompt): payload = { "inputs": prompt, "options": {"wait_for_model": True} } response = requests.post(API_URL, headers=headers, json=payload) # Check for HTTP or decoding errors if response.status_code != 200: return f"❌ HF API Error: {response.status_code} - {response.text}" try: result = response.json() if isinstance(result, list) and "generated_text" in result[0]: return result[0]["generated_text"] elif isinstance(result, dict) and "error" in result: return f"❌ Model Error: {result['error']}" else: return "⚠️ Unexpected model response format." except Exception as e: return f"⚠️ Failed to parse response: {e}" # UI st.title("💬 Ask Me Anything - Tech RAG Chatbot") user_query = st.text_input("🔎 Ask your question:") if user_query: # Retrieve top 3 matching docs from vector DB results = collection.query(query_texts=[user_query], n_results=3) context = "\n".join(results["documents"][0]) if results["documents"] else "" # Build prompt prompt = f"""Answer the question using the context below: Context: {context} Question: {user_query} Answer:""" # Send to HF API with st.spinner("Thinking..."): answer = query_huggingface(prompt) st.markdown("### 📢 Answer:") st.write(answer)