clean up the sources to create direct access

#13
by RCaz - opened
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -81,6 +81,27 @@ class RateLimiter:
81
  print("Rate Limit init.")
82
  limiter = RateLimiter(max_requests=10, window_minutes=60)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # setup chatbot
85
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
86
  from langchain.chat_models import init_chat_model
@@ -176,7 +197,7 @@ def predict(message, history, request: gr.Request):
176
 
177
  try :
178
  raw_source_lines = [
179
- f"{i+1} : {doc.metadata["source"].split("/")[-1]} (page {doc.metadata['page_label']}/{doc.metadata['total_pages']})\n---"
180
  for i, doc in enumerate(relevant_docs)]
181
 
182
  seen = set()
@@ -188,8 +209,7 @@ def predict(message, history, request: gr.Request):
188
  source_context = "\nSources:" + "\n".join(unique_source_lines)
189
 
190
  except :
191
- source_context = "\nSources:" + "\n".join([
192
- f"{doc.metadata["source"]}" for i, doc in enumerate(relevant_docs)])
193
 
194
  messages.append(AIMessage(content=source_context))
195
  print(gpt_response.content )
 
81
  print("Rate Limit init.")
82
  limiter = RateLimiter(max_requests=10, window_minutes=60)
83
 
84
+ # helper func
85
+
86
+ def format_source(doc):
87
+ """
88
+ format source according to its path
89
+ handles github api, internet page and uploaded files (pdf)
90
+
91
+ Args:
92
+ doc: a langchain Document
93
+ Returns:
94
+ str : formated_source from langchain Document"""
95
+ source = doc.metadata["source"]
96
+ if 'api.github' in source:
97
+ return source.split("/blob")[0].replace("api.","")
98
+ elif "https://" in source:
99
+ return source
100
+ elif "data" in source:
101
+ page_label = doc.metadata["pagpage_labele"]
102
+ total_page = doc.metadata["total_page"]
103
+ return f"{source.split("/")[-1]} page({page_label/total_page})"
104
+
105
  # setup chatbot
106
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
107
  from langchain.chat_models import init_chat_model
 
197
 
198
  try :
199
  raw_source_lines = [
200
+ f"{i+1} : {format_source(doc)})\n---"
201
  for i, doc in enumerate(relevant_docs)]
202
 
203
  seen = set()
 
209
  source_context = "\nSources:" + "\n".join(unique_source_lines)
210
 
211
  except :
212
+ source_context = "Issue extracting source"
 
213
 
214
  messages.append(AIMessage(content=source_context))
215
  print(gpt_response.content )