from huggingface_hub import InferenceClient class Mistral_7b: def __init__(self, token): self.client = InferenceClient(token=token) self.model_id = "mistralai/Mistral-7B-Instruct-v0.2" def generate_stream(self, prompt, max_tokens=1000, temperature=0.1): try: for message in self.client.chat_completion( model=self.model_id, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, stream=True, extra_body={"reasoning": "none"}, ): if message.choices: content = message.choices[0].delta.content if content: yield content except Exception as e: yield f" Mistral_7b Error: {e}" def generate(self, prompt, max_tokens=1000, temperature=0.1): return "".join(self.generate_stream(prompt, max_tokens=max_tokens, temperature=temperature))