| from langchain_community.llms import HuggingFaceHub |
| from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| from langchain.chains import LLMChain |
| from langchain.prompts import PromptTemplate |
| import sys |
| import os |
| import logging |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) |
| from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS |
|
|
| def get_llm(): |
| """Initialize and return the language model.""" |
| |
| cache_dir = "/app/models" |
| if not os.path.exists(cache_dir): |
| try: |
| os.makedirs(cache_dir, exist_ok=True) |
| os.chmod(cache_dir, 0o777) |
| except Exception as e: |
| logger.warning(f"Could not create cache directory: {e}") |
| cache_dir = None |
| |
| |
| api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "") |
| logger.info(f"Using model: {LLM_MODEL}") |
| |
| |
| try: |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM |
| |
| logger.info(f"Loading model {LLM_MODEL} as local pipeline") |
| |
| |
| models_to_try = [ |
| LLM_MODEL, |
| "distilgpt2", |
| "gpt2", |
| "EleutherAI/gpt-neo-125M" |
| ] |
| |
| last_error = None |
| |
| for model_name in models_to_try: |
| try: |
| logger.info(f"Attempting to load model: {model_name}") |
| |
| |
| try: |
| |
| tokenizer = AutoTokenizer.from_pretrained( |
| model_name, |
| use_auth_token=api_key if api_key else None, |
| trust_remote_code=False |
| ) |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| use_auth_token=api_key if api_key else None, |
| trust_remote_code=False, |
| low_cpu_mem_usage=True |
| ) |
| |
| |
| pipe = pipeline( |
| "text-generation", |
| model=model, |
| tokenizer=tokenizer, |
| max_length=MAX_TOKENS, |
| temperature=DEFAULT_TEMPERATURE, |
| device=-1 |
| ) |
| |
| logger.info(f"Successfully loaded model: {model_name}") |
| return HuggingFacePipeline(pipeline=pipe) |
| except Exception as e: |
| logger.warning(f"Error loading {model_name} with explicit model/tokenizer: {e}") |
| last_error = e |
| |
| |
| pipe = pipeline( |
| "text-generation", |
| model=model_name, |
| max_length=MAX_TOKENS, |
| temperature=DEFAULT_TEMPERATURE, |
| use_auth_token=api_key if api_key else None, |
| device=-1 |
| ) |
| |
| logger.info(f"Successfully loaded model: {model_name} via direct pipeline") |
| return HuggingFacePipeline(pipeline=pipe) |
| |
| except Exception as e: |
| logger.warning(f"Error loading model {model_name}: {e}") |
| last_error = e |
| |
| continue |
| |
| |
| logger.error(f"All models failed to load. Last error: {last_error}") |
| raise last_error |
| |
| except Exception as e: |
| logger.warning(f"Error creating local pipeline: {e}") |
| |
| |
| try: |
| logger.info("Attempting to use HuggingFaceEndpoint") |
| return HuggingFaceEndpoint( |
| repo_id="gpt2", |
| max_length=MAX_TOKENS, |
| temperature=DEFAULT_TEMPERATURE, |
| huggingfacehub_api_token=api_key |
| ) |
| except Exception as endpoint_error: |
| logger.warning(f"HuggingFaceEndpoint failed: {endpoint_error}") |
| |
| |
| from langchain.llms.fake import FakeListLLM |
| logger.warning("Using mock LLM as fallback") |
| return FakeListLLM( |
| responses=[ |
| "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.", |
| "I can't access the language model currently. Please check the Space logs for more information.", |
| "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured." |
| ] |
| ) |
|
|
| def get_embeddings(): |
| """Initialize and return the embeddings model.""" |
| |
| cache_dir = "/app/models" |
| if not os.path.exists(cache_dir): |
| try: |
| os.makedirs(cache_dir, exist_ok=True) |
| os.chmod(cache_dir, 0o777) |
| except Exception as e: |
| logger.warning(f"Could not create cache directory: {e}") |
| cache_dir = None |
| |
| |
| embedding_models_to_try = [ |
| EMBEDDING_MODEL, |
| "sentence-transformers/all-MiniLM-L6-v2", |
| "sentence-transformers/paraphrase-MiniLM-L3-v2", |
| "sentence-transformers/paraphrase-albert-small-v2" |
| ] |
| |
| api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "") |
| |
| for model_name in embedding_models_to_try: |
| |
| try: |
| logger.info(f"Loading embeddings model: {model_name}") |
| return HuggingFaceEmbeddings( |
| model_name=model_name, |
| cache_folder=cache_dir, |
| encode_kwargs={"normalize_embeddings": True}, |
| model_kwargs={"device": "cpu"} |
| ) |
| except Exception as e: |
| logger.warning(f"Error initializing embeddings with {model_name}: {e}") |
| |
| |
| |
| try: |
| from sentence_transformers import SentenceTransformer |
| logger.info("Loading embeddings with SentenceTransformer directly") |
| model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu") |
| |
| |
| class DirectEmbeddings: |
| def embed_documents(self, texts): |
| return model.encode(texts, normalize_embeddings=True).tolist() |
| |
| def embed_query(self, text): |
| return model.encode(text, normalize_embeddings=True).tolist() |
| |
| return DirectEmbeddings() |
| except Exception as e: |
| logger.warning(f"Error with direct SentenceTransformer: {e}") |
| |
| |
| from langchain.embeddings.fake import FakeEmbeddings |
| logger.warning("Using mock embeddings as fallback") |
| return FakeEmbeddings(size=384) |
|
|
| def get_chat_model(): |
| """ |
| Create a chat-like interface using a regular LLM. |
| This is necessary because many free HF models don't have chat interfaces. |
| """ |
| llm = get_llm() |
| |
| |
| chat_template = """ |
| Context: {context} |
| |
| Chat History: |
| {chat_history} |
| |
| User: {question} |
| AI Assistant: |
| """ |
| |
| prompt = PromptTemplate( |
| input_variables=["context", "chat_history", "question"], |
| template=chat_template |
| ) |
| |
| |
| return LLMChain(llm=llm, prompt=prompt) |