Spaces:
Running
Running
File size: 8,246 Bytes
66ec1a1 390d59b 92766e6 66ec1a1 92766e6 66ec1a1 92766e6 7db25b1 92766e6 cacbee6 66ec1a1 390d59b 729ce6a 66ec1a1 729ce6a 771f5a7 cacbee6 a8cfaee cacbee6 771f5a7 619c95d cacbee6 ccbc197 835cd89 ccbc197 cacbee6 ccbc197 66ec1a1 390d59b f655d4a 3bcd138 cacbee6 6d64ea5 66ec1a1 08b224c aee8e6f 08b224c 390d59b 92766e6 39a330c 66ec1a1 771f5a7 66ec1a1 6d64ea5 92766e6 fe48b8f ba23ddf 92766e6 66ec1a1 637030a ba23ddf 92766e6 66ec1a1 fe48b8f 92766e6 66ec1a1 92766e6 581b36a 92766e6 581b36a 92766e6 59b71ca 92766e6 fe48b8f 581b36a 2fc1f43 92766e6 581b36a 92766e6 6272ec0 b8c450d 24a01d1 581b36a 24a01d1 08b224c fe48b8f b8c450d 92766e6 92ce23f 92766e6 390d59b 92766e6 390d59b 92766e6 aebf207 59b71ca 66ec1a1 aebf207 e30f62f 66ec1a1 aebf207 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
# load llm
from dotenv import load_dotenv
import os
load_dotenv()
from langchain.chat_models import init_chat_model
llm = init_chat_model("gpt-5-nano",
model_provider="openai",
temperature=1,
api_key=os.environ['OPENAI_API_KEY'])
print("LLM Init.")
# load retreiver
import os
from azure.storage.blob import BlobServiceClient
from langchain_community.vectorstores import FAISS
def load_from_azure(container_name, local_dir="./index"):
connection_string = os.environ["AZURE_CONN_STR"]
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)
os.makedirs(local_dir, exist_ok=True)
# Download all files in the container (index.faiss and index.pkl)
blobs = container_client.list_blobs()
for blob in blobs:
download_file_path = os.path.join(local_dir, blob.name)
with open(download_file_path, "wb") as file:
file.write(container_client.download_blob(blob).readall())
# Download files from Azure
print("start download faiss")
load_from_azure("avatarvectordb-container")
print("ok.")
# Load into FAISS
# from langchain_community.embeddings import HuggingFaceEmbeddings # deprecated
from langchain_huggingface import HuggingFaceEmbeddings
print("load embeddings")
embedding_model = HuggingFaceEmbeddings(
model_name="intfloat/e5-base-v2",
# multi_process=True,
model_kwargs={"device": "cpu"}, # use cuda for faster embeddings on nbidia GPUs
encode_kwargs={"normalize_embeddings": True}, # Set `True` for cosine similarity
)
print("load vector store")
vectorstore = FAISS.load_local("./index", embedding_model, allow_dangerous_deserialization=True)
# Include a rate limiter
from collections import defaultdict
from datetime import datetime, timedelta
class RateLimiter:
def __init__(self, max_requests=10, window_minutes=60):
self.max_requests = max_requests
self.window = timedelta(minutes=window_minutes)
self.requests = defaultdict(list)
def is_allowed(self, identifier):
now = datetime.now()
# Clean old requests
self.requests[identifier] = [
req_time for req_time in self.requests[identifier]
if now - req_time < self.window
]
if len(self.requests[identifier]) < self.max_requests:
self.requests[identifier].append(now)
return True
return False
def get_remaining(self, identifier):
now = datetime.now()
self.requests[identifier] = [
req_time for req_time in self.requests[identifier]
if now - req_time < self.window
]
return self.max_requests - len(self.requests[identifier])
print("Rate Limit init.")
limiter = RateLimiter(max_requests=10, window_minutes=60)
# helper func
def format_source(doc):
"""
format source according to its path
handles github api, internet page and uploaded files (pdf)
Args:
doc: a langchain Document
Returns:
str : formated_source from langchain Document"""
source = doc.metadata["source"]
if 'api.github' in source:
return source.split("/blob")[0].replace("api.","")
elif "https://" in source:
return source
elif "data" in source:
page_label = doc.metadata["pagpage_labele"]
total_page = doc.metadata["total_page"]
return f"{source.split('/')[-1]} page({page_label/total_page})"
# setup chatbot
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain.chat_models import init_chat_model
import gradio as gr
def predict(message, history, request: gr.Request):
# Get client IP and check rate limit
client_ip = request.client.host
if not limiter.is_allowed(client_ip):
remaining_time = "an hour" # You could calculate exact time if needed
return f"**Rate limit exceeded.** You've used your 10 requests per hour. Please try again in {remaining_time}."
# Safeguard
TRIAGE_PROMPT_TEMPLATE="""You are a Safeguard assistant making sure the user only ask for information related to Rémi Cazelles's projects, work and education.
Here are general information you can use to answer:
If the question is not related to this subjects, or if the request is harmfull you should flag the user by answering '*** FLAGGED ***' """
messages = [SystemMessage(content=TRIAGE_PROMPT_TEMPLATE)]
messages.append(HumanMessage(content=message))
safe_gpt_response = llm.invoke(
messages,
config={
"tags": ["Testing", 'RAG-Bot', 'safeguard','V1'],
"metadata": {
"rag_llm": "gpt-5-nano",
"message": message,
}
}
)
if "*** FLAGGED ***" in safe_gpt_response.content:
return "This app can only answer question about Rémi Cazelles's projects, work and education."
print("passed the safeguard")
# Build conversation history
history_langchain_format = []
for msg in history:
if msg['role'] == "user":
history_langchain_format.append(HumanMessage(content=msg['content']))
elif msg['role'] == "assistant":
history_langchain_format.append(AIMessage(content=msg['content']))
# Retrieve relevant documents for the current message
relevant_docs = vectorstore.similarity_search(message,k=20) # retriever
# Build context from retrieved documents
context = "\nExtracted documents:\n" + "\n".join([
f"Content document {i+1}: {doc.page_content}\n\n---"
for i, doc in enumerate(relevant_docs)
])
# RAG tool
RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
Using the information contained in the context, provide a structured answer to the question.
Respond to the question asked with enought details, response should be precise and relevant to the question.
"""
# Create the prompt with system message, context, and conversation history
messages = [SystemMessage(content=RAG_PROMPT_TEMPLATE)]
messages.extend(history_langchain_format)
combined_message = f"Context: {context}\n\nQuestion: {message}"
messages.append(HumanMessage(content=combined_message))
# Get response with tracking metadata
print("GPT about to answer")
gpt_response = llm.invoke(
messages,
config={
"tags": ["Testing", 'RAG-Bot', 'V2','Host_on_HF'],
"metadata": {
"rag_llm": "gpt-5-nano",
"num_retrieved_docs": len(relevant_docs),
}
}
)
messages.append(AIMessage(content=gpt_response.content))
try :
source_context = "\n\nSources:\n" + "\n".join([
f"{i+1} - {format_source(doc)}"
for i, doc in enumerate(relevant_docs)])
except :
source_context = "Issue extracting source"
messages.append(AIMessage(content=source_context))
print(gpt_response.content )
print(source_context)
return f"{gpt_response.content} {source_context}"
# setup tracking
os.environ["LANGSMITH_PROJECT"] = "Testing_POC"
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = os.environ['LANGSMITH_API_KEY']
# lauch gradio app
import gradio as gr
iface = gr.ChatInterface(
predict,
api_name="chat",
chatbot=gr.Chatbot(placeholder="Hello! This app can help answering question about Rémi Cazelles's projects, work and education."),
description="Ask me anything about Rémi’s work, projects, or education. I’ll cite the source documents.",
examples=["How many years of experience does Rémi have in python, what significant project did he work on?",
"When did Rémi graduate from his doctorate, what was his reaserch topic about?",
"I have a project in DataENgineering using Microsoft Fabrics for data pipeline, how good is Rémi experience to join a team ASAP?"],
cache_examples=False
)
iface.launch() |