BMD

Runtime error

App Files Files Community

vkasyap commited on Sep 12, 2024

Commit

e7daa3f

verified ·

1 Parent(s): c32ef27

Rename app (2).py to app.py

Browse files

Files changed (1) hide show

app (2).py → app.py +0 -44

app (2).py → app.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import os
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
@@ -10,11 +9,9 @@ from langchain_community.llms import HuggingFacePipeline
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import HuggingFaceEndpoint
 from pathlib import Path
 import chromadb
 from unidecode import unidecode
 from transformers import AutoTokenizer, AutoModelForMaskedLM
 import transformers
 import torch
@@ -28,10 +25,6 @@ model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")
 # default_persist_directory = './chroma_HF/'
 list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
-    "google/gemma-7b-it","google/gemma-2b-it", \
-    "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
-    "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
-    "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
     "google/flan-t5-xxl"
 ]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
@@ -94,42 +87,6 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
             top_k = top_k,
             load_in_8bit = True,
         )
-    elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
-        raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-        )
-    elif llm_model == "microsoft/phi-2":
-        # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-            trust_remote_code = True,
-            torch_dtype = "auto",
-        )
-    elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
-            temperature = temperature,
-            max_new_tokens = 250,
-            top_k = top_k,
-        )
-    elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
-        raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
-        llm = HuggingFaceEndpoint(
-            repo_id=llm_model,
-            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
-            temperature = temperature,
-            max_new_tokens = max_tokens,
-            top_k = top_k,
-        )
     else:
         llm = HuggingFaceEndpoint(
             repo_id=llm_model,
@@ -222,7 +179,6 @@ def format_chat_history(message, chat_history):
         formatted_chat_history.append(f"Assistant: {bot_message}")
     return formatted_chat_history
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
     #print("formatted_chat_history",formatted_chat_history)

 import gradio as gr
 import os
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import HuggingFaceEndpoint
 from pathlib import Path
 import chromadb
 from unidecode import unidecode
 from transformers import AutoTokenizer, AutoModelForMaskedLM
 import transformers
 import torch
 # default_persist_directory = './chroma_HF/'
 list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
     "google/flan-t5-xxl"
 ]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
             top_k = top_k,
             load_in_8bit = True,
         )
     else:
         llm = HuggingFaceEndpoint(
             repo_id=llm_model,
         formatted_chat_history.append(f"Assistant: {bot_message}")
     return formatted_chat_history
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
     #print("formatted_chat_history",formatted_chat_history)