Spaces:
Runtime error
Runtime error
| import os | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| load_dotenv() | |
| def gemma_predict(combined_information, model_name, config: Optional[dict]): | |
| HF_token = os.environ["HF_TOKEN"] | |
| client = InferenceClient(model_name, token=HF_token) | |
| stream = client.text_generation(prompt=combined_information, details=True, stream=True, | |
| max_new_tokens=config["max_output_tokens"], | |
| temperature=config["temperature"], | |
| return_full_text=False) | |
| output = "" | |
| for response in stream: | |
| output += response.token.text | |
| if "<eos>" in output: | |
| output = output.split("<eos>")[0] | |
| return output | |