Spaces:
Sleeping
Sleeping
| import os | |
| from langchain_openai import ChatOpenAI | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| _DEFAULT_TIMEOUT = int(os.getenv("LLM_TIMEOUT_SECONDS", "30")) | |
| def get_llm(timeout: int = _DEFAULT_TIMEOUT): | |
| """ | |
| Returns a configured LLM instance based on environment variables. | |
| Supports OpenAI (default) and OpenAI-compatible endpoints (DeepInfra, OpenRouter, Groq). | |
| All providers are given an HTTP timeout to prevent hung LLM calls from | |
| blocking the FastAPI server indefinitely. | |
| """ | |
| provider = os.getenv("LLM_PROVIDER", "OPENAI").upper() | |
| api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") | |
| base_url = os.getenv("LLM_BASE_URL") | |
| model_name = os.getenv("LLM_MODEL_NAME") | |
| common_kwargs = { | |
| "request_timeout": timeout, | |
| } | |
| if provider == "OPENAI": | |
| return ChatOpenAI( | |
| model=model_name or "gpt-4-turbo", | |
| api_key=api_key, | |
| base_url=base_url, | |
| **common_kwargs, | |
| ) | |
| elif provider == "QWEN": | |
| # Qwen 2.5 Coder via OpenRouter or DeepInfra | |
| return ChatOpenAI( | |
| model=model_name or "qwen/qwen-2.5-coder-32b-instruct", | |
| openai_api_key=api_key, | |
| openai_api_base=base_url or "https://openrouter.ai/api/v1", | |
| max_tokens=2048, | |
| temperature=0.2, | |
| **common_kwargs, | |
| ) | |
| else: | |
| # Generic OpenAI-compatible endpoint | |
| return ChatOpenAI( | |
| model=model_name, | |
| openai_api_key=api_key, | |
| openai_api_base=base_url, | |
| **common_kwargs, | |
| ) | |