Spaces:

BtB-ExpC
/

Exercises

Sleeping

BtB-ExpC commited on Feb 8, 2025

Commit

ca454c0

1 Parent(s): 38e8d65

recovery

Files changed (4) hide show

chains/diagnoser_chain.py CHANGED Viewed

@@ -25,10 +25,8 @@ class DiagnoserChain(BaseModel):
         # --- Step 2: Generate a diagnosis using the standardized exercise ---
         prompt_diagnose = await self.template_diagnose.aformat_prompt(standardized_exercise=standardized_exercise)
         diagnose_messages = prompt_diagnose.to_messages()
-        diagnosis = ""
-        async for token in self.llm_diagnose.astream(diagnose_messages):
-            diagnosis += token
-            # Here you could, for example, update a UI element if you were streaming tokens to the frontend.
         return diagnosis
     class Config:

         # --- Step 2: Generate a diagnosis using the standardized exercise ---
         prompt_diagnose = await self.template_diagnose.aformat_prompt(standardized_exercise=standardized_exercise)
         diagnose_messages = prompt_diagnose.to_messages()
+        diagnosis = await self.llm_diagnose.astream(diagnose_messages)
         return diagnosis
     class Config:

chains/distractors_chain.py CHANGED Viewed

@@ -6,10 +6,11 @@ from config.exercise_standardizer import standardize_exercise
 class DistractorsChain(BaseModel):
-    llm_standardize: Any    # Fixed LLM for step 1
     template_standardize: ChatPromptTemplate
-    template: ChatPromptTemplate
-    llm: Any                # User-selectable LLM for step 2
     async def run(self, user_query: str, exercise_format: str) -> str:
         """
@@ -25,20 +26,9 @@ class DistractorsChain(BaseModel):
         # --- Step 2: Generate new distractors using the standardized exercise ---
         prompt_distractors = await self.template_distractors.aformat_prompt(standardized_exercise=standardized_exercise)
         distractors_messages = prompt_distractors.to_messages()
-        distractors = ""
-        async for token in self.llm_distr.astream(distractors_messages):
-            distractors += token
-            # Here you could, for example, update a UI element if you were streaming tokens to the frontend.
-        return distractors
-        prompt = await self.template.aformat_prompt(user_input=user_query)
-        messages = prompt.to_messages()
-        result = await self.llm.ainvoke(messages)
-        return result
     class Config:
         arbitrary_types_allowed = True

 class DistractorsChain(BaseModel):
     template_standardize: ChatPromptTemplate
+    template_distr: ChatPromptTemplate
+    llm_standardize: Any            # Fixed LLM for step 1
+    llm_distr: Any                  # User-selectable LLM for step 2
     async def run(self, user_query: str, exercise_format: str) -> str:
         """
         # --- Step 2: Generate new distractors using the standardized exercise ---
         prompt_distractors = await self.template_distractors.aformat_prompt(standardized_exercise=standardized_exercise)
         distractors_messages = prompt_distractors.to_messages()
+        distractors = await self.llm_distr.astream(distractors_messages)
+        return distractors
     class Config:
         arbitrary_types_allowed = True

config/exercise_standardizer.py CHANGED Viewed

@@ -22,10 +22,6 @@ async def standardize_exercise(user_query: str, exercise_format: str, template:
     )
     std_messages = prompt_std.to_messages()
-    # Stream tokens to construct the standardized response
-    standardized_exercise = ""
-    async for token in llm.astream(std_messages):
-        standardized_exercise += token
     return standardized_exercise

     )
     std_messages = prompt_std.to_messages()
+    standardized_exercise = await llm.ainvoke(std_messages)
     return standardized_exercise

utils/streaming.py DELETED Viewed

@@ -1,35 +0,0 @@
-# utils/streaming.py
-import os
-import asyncio
-from huggingface_hub import AsyncInferenceClient
-async def stream_chat_completion(messages, model_name: str, max_tokens: int = 1024):
-    """
-    Stream tokens from a Hugging Face Inference endpoint.
-    Args:
-        messages (list[dict]): A list of message dictionaries, e.g.:
-            [{"role": "system", "content": "You are a helpful assistant."},
-             {"role": "user", "content": "Count to 10"}]
-        model_name (str): The identifier for the model (used in the base_url).
-        max_tokens (int): Maximum tokens to generate.
-    Yields:
-        str: Tokens as they are generated.
-    """
-    # Construct a base URL that points to the model’s endpoint.
-    base_url = f"https://api-inference.huggingface.co/models/{model_name}"
-    token = os.getenv("HF_API_TOKEN")
-    client = AsyncInferenceClient(base_url=base_url, token=token)
-    stream = await client.chat.completions.create(
-        messages=messages,
-        stream=True,
-        max_tokens=max_tokens,
-    )
-    async for chunk in stream:
-        # Each chunk is expected to have a structure where the generated text is in:
-        # chunk.choices[0].delta.content
-        yield chunk.choices[0].delta.content or ""