Spaces:
Sleeping
Sleeping
| import os | |
| from openai import OpenAI | |
| from huggingface_hub import InferenceClient | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class LLMProvider: | |
| def __init__(self, provider=None): | |
| self.provider = provider or os.getenv("ACTIVE_LLM_PROVIDER", "llama").lower() | |
| if self.provider == "openai": | |
| print("🔗 Connecting directly to official OpenAI API...") | |
| self.client = OpenAI( | |
| api_key=os.getenv("OPENAI_API_KEY") | |
| ) | |
| # This is the alias your logs will see | |
| self.model_name = "gpt-oss-120b" | |
| else: | |
| print(f"🦙 Initializing Llama-3-70B via Hugging Face...") | |
| self.client = InferenceClient(api_key=os.getenv("HF_TOKEN")) | |
| self.model_name = "meta-llama/Meta-Llama-3-70B-Instruct" | |
| def generate(self, prompt, context): | |
| citation_instruction = ( | |
| "You MUST cite the specific sources from the context provided using their IDs in brackets, " | |
| "like [S12] or [PAPER_001]. If a paper has a filename, use that. " | |
| "Always provide a 'References' list at the end." | |
| ) | |
| full_query = f"{citation_instruction}\n\nContext: {context}\n\nQuestion: {prompt}" | |
| try: | |
| if self.provider == "openai": | |
| response = self.client.chat.completions.create( | |
| model="gpt-4o", # The actual underlying engine | |
| messages=[ | |
| {"role": "system", "content": citation_instruction}, | |
| {"role": "user", "content": full_query} | |
| ], | |
| temperature=0.2 | |
| ) | |
| return response.choices[0].message.content | |
| else: | |
| response = self.client.chat_completion( | |
| messages=[ | |
| {"role": "system", "content": citation_instruction}, | |
| {"role": "user", "content": full_query} | |
| ], | |
| model=self.model_name, | |
| max_tokens=800, | |
| temperature=0.2 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error using {self.provider}: {str(e)}" |