Spaces:
Runtime error
Runtime error
| from llama_index.llms.huggingface import HuggingFaceLLM, HuggingFaceInferenceAPI | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.llms.replicate import Replicate | |
| from dotenv import load_dotenv | |
| import os | |
| import streamlit as st | |
| load_dotenv() | |
| # download the model from the Hugging Face Hub and run it locally | |
| # llm_mixtral_8x7b = HuggingFaceLLM(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1") | |
| # llm_llama_2_7b_chat = HuggingFaceInferenceAPI( | |
| # model_name="meta-llama/Llama-2-7b-chat-hf", | |
| # token=os.getenv("HUGGINGFACE_API_TOKEN"), | |
| # ) | |
| # dict = {"source": "model_name"} | |
| integrated_llms = { | |
| "gpt-3.5-turbo-0125": "openai", | |
| "meta/llama-2-13b-chat": "replicate", | |
| "mistralai/Mistral-7B-Instruct-v0.2": "huggingface", | |
| # "mistralai/Mixtral-8x7B-v0.1": "huggingface", # 93 GB model | |
| # "meta-llama/Meta-Llama-3-8B": "huggingface", # too large >10G for llama index hf interference to load | |
| } | |
| def load_llm(model_name: str, source: str = "huggingface"): | |
| print("model_name: ", model_name, "source: ", source) | |
| if integrated_llms.get(model_name) is None: | |
| return None | |
| try: | |
| if source.startswith("openai"): | |
| llm_gpt_3_5_turbo_0125 = OpenAI( | |
| model=model_name, | |
| api_key=st.session_state.openai_api_key, | |
| temperature=0.0, | |
| ) | |
| return llm_gpt_3_5_turbo_0125 | |
| elif source.startswith("replicate"): | |
| llm_llama_13b_v2_replicate = Replicate( | |
| model=model_name, | |
| is_chat_model=True, | |
| additional_kwargs={"max_new_tokens": 250}, | |
| prompt_key=st.session_state.replicate_api_token, | |
| temperature=0.0, | |
| ) | |
| return llm_llama_13b_v2_replicate | |
| elif source.startswith("huggingface"): | |
| llm_mixtral_8x7b = HuggingFaceInferenceAPI( | |
| model_name=model_name, | |
| token=st.session_state.hf_token, | |
| ) | |
| return llm_mixtral_8x7b | |
| except Exception as e: | |
| print(e) | |