disLodge commited on
Commit
ae47fec
·
verified ·
1 Parent(s): fbd2e2c
Files changed (1) hide show
  1. app.py +21 -50
app.py CHANGED
@@ -11,60 +11,17 @@ from langchain_core.documents import Document
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.text_splitter import CharacterTextSplitter
13
  from huggingface_hub import InferenceClient
 
14
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
15
  import logging
16
  import os
17
 
18
- # logging.basicConfig(level=logging.INFO)
19
- # logger = logging.getLogger(__name__)
20
 
21
- lo = "hf_JyAJApaXhIrONPFSIo"
22
- ve = "wbnJbrXViYurrsvP"
23
- half = lo+ve
24
- HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN",half )
25
- client = InferenceClient(
26
- model="mistralai/Mixtral-8x7B-Instruct-v0.1",
27
- token=HF_TOKEN
28
- )
29
-
30
-
31
- class HuggingFaceInterferenceClientRunnable(Runnable):
32
- def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
33
- self.client = client
34
- self.max_tokens = max_tokens
35
- self.temperature = temperature
36
- self.top_p = top_p
37
-
38
- @retry(
39
- stop=stop_after_attempt(3),
40
- wait=wait_exponential(multiplier=1, min=4, max=10),
41
- retry=retry_if_exception_type((requests.exceptions.ConnectionError, requests.exceptions.Timeout))
42
- )
43
-
44
- def invoke(self, input, config=None):
45
- prompt = input.to_messages()[0].content
46
- messages = [{"role": "user", "content": prompt}]
47
-
48
- response = ""
49
- for part in self.client.chat_completion(
50
- messages,
51
- max_tokens=self.max_tokens,
52
- stream=True,
53
- temperature=self.temperature,
54
- top_p=self.top_p
55
- ):
56
- for part in part.choices:
57
- token = part.delta.content
58
- if token:
59
- response += token
60
-
61
- return response
62
-
63
- def update_params(self, max_tokens, temperature, top_p):
64
- self.max_tokens = max_tokens
65
- self.temperature=temperature
66
- self.top_p=top_p
67
 
 
 
68
 
69
  def extract_pdf_text(url: str) -> str:
70
  response = requests.get(url)
@@ -88,7 +45,13 @@ vectorstore = Chroma.from_documents(
88
  )
89
  retriever = vectorstore.as_retriever()
90
 
91
- llm = HuggingFaceInterferenceClientRunnable(client)
 
 
 
 
 
 
92
 
93
  # After RAG chain
94
  after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
@@ -116,7 +79,15 @@ after_rag_chain = (
116
 
117
  def process_query(role, system_message, max_tokens, temperature, top_p):
118
 
119
- llm.update_params(max_tokens, temperature, top_p)
 
 
 
 
 
 
 
 
120
 
121
  # After RAG
122
  after_rag_result = after_rag_chain.invoke({"role": role})
 
11
  from langchain_core.prompts import ChatPromptTemplate
12
  from langchain.text_splitter import CharacterTextSplitter
13
  from huggingface_hub import InferenceClient
14
+ import time
15
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
16
  import logging
17
  import os
18
 
 
 
19
 
20
+ # lo = "hf_JyAJApaXhIrONPFSIo"
21
+ # ve = "wbnJbrXViYurrsvP"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ last_call_time = 0
24
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
25
 
26
  def extract_pdf_text(url: str) -> str:
27
  response = requests.get(url)
 
45
  )
46
  retriever = vectorstore.as_retriever()
47
 
48
+ llm = ChatOpenAI(
49
+ model="gpt-3.5-turbo",
50
+ api_key=OPENAI_API_KEY,
51
+ max_tokens=512,
52
+ temperature=0.7,
53
+ top_p=0.95
54
+ )
55
 
56
  # After RAG chain
57
  after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
 
79
 
80
  def process_query(role, system_message, max_tokens, temperature, top_p):
81
 
82
+ global last_call_time
83
+ if current_time - last_call_time < 60:
84
+ wait_time = int(60 - (current_time - last_call_time))
85
+ return f"Rate limit exceeded. Please wait {wait_time} seconds before trying again."
86
+ # llm.update_params(max_tokens, temperature, top_p)
87
+ last_call_time = current_time
88
+ llm.max_tokens = max_tokens
89
+ llm.temperature = temperature
90
+ llm.top_p = top_p
91
 
92
  # After RAG
93
  after_rag_result = after_rag_chain.invoke({"role": role})