Spaces:

disLodge
/

Call_model

Sleeping

App Files Files Community

disLodge commited on May 7, 2025

Commit

ae47fec

verified ·

1 Parent(s): fbd2e2c

fix

Browse files

Files changed (1) hide show

app.py +21 -50

app.py CHANGED Viewed

@@ -11,60 +11,17 @@ from langchain_core.documents import Document
 from langchain_core.prompts import ChatPromptTemplate
 from langchain.text_splitter import CharacterTextSplitter
 from huggingface_hub import InferenceClient
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 import logging
 import os
-# logging.basicConfig(level=logging.INFO)
-# logger = logging.getLogger(__name__)
-lo = "hf_JyAJApaXhIrONPFSIo"
-ve = "wbnJbrXViYurrsvP"
-half = lo+ve
-HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN",half )
-client = InferenceClient(
-    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
-    token=HF_TOKEN
-)
-class HuggingFaceInterferenceClientRunnable(Runnable):
-    def __init__(self, client, max_tokens=512, temperature=0.7, top_p=0.95):
-        self.client = client
-        self.max_tokens = max_tokens
-        self.temperature = temperature
-        self.top_p = top_p
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_exponential(multiplier=1, min=4, max=10),
-        retry=retry_if_exception_type((requests.exceptions.ConnectionError, requests.exceptions.Timeout))
-    )
-    def invoke(self, input, config=None):
-        prompt = input.to_messages()[0].content
-        messages = [{"role": "user", "content": prompt}]
-        response = ""
-        for part in self.client.chat_completion(
-            messages,
-            max_tokens=self.max_tokens,
-            stream=True,
-            temperature=self.temperature,
-            top_p=self.top_p
-        ):
-            for part in part.choices:
-                token = part.delta.content
-                if token:
-                    response += token
-        return response
-    def update_params(self, max_tokens, temperature, top_p):
-        self.max_tokens = max_tokens
-        self.temperature=temperature
-        self.top_p=top_p
 def extract_pdf_text(url: str) -> str:
     response = requests.get(url)
@@ -88,7 +45,13 @@ vectorstore = Chroma.from_documents(
 )
 retriever = vectorstore.as_retriever()
-llm = HuggingFaceInterferenceClientRunnable(client)
 # After RAG chain
 after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
@@ -116,7 +79,15 @@ after_rag_chain = (
 def process_query(role, system_message, max_tokens, temperature, top_p):
-    llm.update_params(max_tokens, temperature, top_p)
     # After RAG
     after_rag_result = after_rag_chain.invoke({"role": role})

 from langchain_core.prompts import ChatPromptTemplate
 from langchain.text_splitter import CharacterTextSplitter
 from huggingface_hub import InferenceClient
+import time
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 import logging
 import os
+# lo = "hf_JyAJApaXhIrONPFSIo"
+# ve = "wbnJbrXViYurrsvP"
+last_call_time = 0
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
 def extract_pdf_text(url: str) -> str:
     response = requests.get(url)
 )
 retriever = vectorstore.as_retriever()
+llm = ChatOpenAI(
+    model="gpt-3.5-turbo",
+    api_key=OPENAI_API_KEY,
+    max_tokens=512,
+    temperature=0.7,
+    top_p=0.95
+)
 # After RAG chain
 after_rag_template = """You are a {role}. Summarize the following content for yourself and speak in terms of first person.
 def process_query(role, system_message, max_tokens, temperature, top_p):
+    global last_call_time
+    if current_time - last_call_time < 60:
+        wait_time = int(60 - (current_time - last_call_time))
+        return f"Rate limit exceeded. Please wait {wait_time} seconds before trying again."
+    # llm.update_params(max_tokens, temperature, top_p)
+    last_call_time = current_time
+    llm.max_tokens = max_tokens
+    llm.temperature = temperature
+    llm.top_p = top_p
     # After RAG
     after_rag_result = after_rag_chain.invoke({"role": role})