Spaces:

mahynski
/

RAG

Sleeping

App Files Files Community

mahynski commited on Aug 7, 2024

Commit

39af631

1 Parent(s): 77c0943

added hf

Browse files

Files changed (2) hide show

app.py +43 -22
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -5,10 +5,12 @@ import streamlit as st
 from llama_index.llms.gemini import Gemini
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.llms.mistralai import MistralAI
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core import (
     VectorStoreIndex,
@@ -24,16 +26,6 @@ MAX_OUTPUT_TOKENS = 2048
 def main():
     with st.sidebar:
         st.title('Document Summarization and QA System')
-        # st.markdown('''
-        # ## About this application
-        # Upload a pdf to ask questions about it. This retrieval-augmented generation (RAG) workflow uses:
-        # - [Streamlit](https://streamlit.io/)
-        # - [LlamaIndex](https://docs.llamaindex.ai/en/stable/)
-        # - [OpenAI](https://platform.openai.com/docs/models)
-        # ''')
-        # st.write('Made by ***Nate Mahynski***')
-        # st.write('nathan.mahynski@nist.gov')
         # Select Provider
         provider = st.selectbox(
@@ -54,11 +46,23 @@ def main():
         else:
             llm_list = []
-        llm_name = st.selectbox(
-            label="Select LLM Model",
-            options=llm_list,
-            index=0
-        )
         # Temperature
         temperature = st.slider(
@@ -78,7 +82,7 @@ def main():
         # Enter LLM API Key
         llm_key = st.text_input(
             "Enter your LLM API Key",
-            value=None,
         )
         # Create LLM
@@ -94,11 +98,26 @@ def main():
                 )
                 Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
                 Settings.num_output = MAX_OUTPUT_TOKENS
-                Settings.context_window = 4096 # max possible
                 Settings.embed_model = OpenAIEmbedding()
             elif provider == 'huggingface':
-                os.environ['HFTOKEN'] = str(llm_key)
-                raise NotImplementedError(f"{provider} is not supported yet")
             else:
                 raise NotImplementedError(f"{provider} is not supported yet")
@@ -144,7 +163,7 @@ def main():
             # Instructions
             1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document.
-            2. Obtain a similar API Key from your preferred LLM provider.
             3. Make selections at the left and upload a document to use as context.
             4. Begin asking questions below!
             """
@@ -169,8 +188,10 @@ def main():
 if __name__ == '__main__':
     # Global configurations
-    from llama_index.core import set_global_handler
-    set_global_handler("langfuse")
     st.set_page_config(layout="wide")
     main()

 from llama_index.llms.gemini import Gemini
 from llama_index.llms.huggingface import HuggingFaceLLM
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.llms.mistralai import MistralAI
 from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding, HuggingFaceInferenceAPIEmbedding
 from llama_index.core import (
     VectorStoreIndex,
 def main():
     with st.sidebar:
         st.title('Document Summarization and QA System')
         # Select Provider
         provider = st.selectbox(
         else:
             llm_list = []
+        if provider == 'huggingface':
+            llm_name = st.text_input(
+                "Model as 'namespace/model-name', e.g. google/gemma-2-9b",
+                value=None,
+            )
+            # Also give the user the option for different embedding models, too
+            embed_name = st.text_input(
+                label="Embedding model as 'namespace/model-name', e.g. BAAI/bge-small-en-v1.5",
+                value="BAAI/bge-small-en-v1.5",
+            )
+        else:
+            llm_name = st.selectbox(
+                label="Select LLM Model",
+                options=llm_list,
+                index=0
+            )
         # Temperature
         temperature = st.slider(
         # Enter LLM API Key
         llm_key = st.text_input(
             "Enter your LLM API Key",
+            value="llx-uxxwLr1gZmDibaHTl99ISQJtpLSjjfhgDvnosGxu92RdRlb7", #None,
         )
         # Create LLM
                 )
                 Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
                 Settings.num_output = MAX_OUTPUT_TOKENS
                 Settings.embed_model = OpenAIEmbedding()
+                Settings.context_window = 4096 # max possible
             elif provider == 'huggingface':
+                if llm_name is not None and embed_name is not None:
+                    os.environ['HFTOKEN'] = str(llm_key)
+                    Settings.llm = HuggingFaceInferenceAPI(
+                        model_name=llm_name,
+                        token=os.environ.get("HFTOKEN"),
+                        temperature=temperature,
+                        max_tokens=MAX_OUTPUT_TOKENS
+                    )
+                    Settings.tokenizer = AutoTokenizer.from_pretrained(
+                        llm_name,
+                        token=os.environ.get("HFTOKEN"),
+                    )
+                    Settings.num_output = MAX_OUTPUT_TOKENS
+                    Settings.embed_model = HuggingFaceInferenceAPIEmbedding(
+                        model_name=embed_name
+                    )
+                    # Settings.context_window = 4096
             else:
                 raise NotImplementedError(f"{provider} is not supported yet")
             # Instructions
             1. Obtain an [API Key](https://cloud.llamaindex.ai/api-key) from LlamaParse to parse your document.
+            2. Obtain a similar API Key from your preferred LLM provider. Note, if you are using [Hugging Face](https://huggingface.co/models) you may need to request access to a model if it is gated.
             3. Make selections at the left and upload a document to use as context.
             4. Begin asking questions below!
             """
 if __name__ == '__main__':
     # Global configurations
+    # from llama_index.core import set_global_handler
+    # set_global_handler("langfuse")
+    # Also add API Key for this if using
     st.set_page_config(layout="wide")
     main()

requirements.txt CHANGED Viewed

@@ -8,4 +8,6 @@ llama-index-llms-mistralai
 llama-index-llms-openai
 tiktoken
 llama-parse
-llama-index-callbacks-langfuse

 llama-index-llms-openai
 tiktoken
 llama-parse
+llama-index-callbacks-langfuse
+llama-index-llms-huggingface-api
+llama-index-llms-huggingface