Spaces:

jordyvl
/

ask_my_thesis

Paused

App Files Files Community

jordyvl commited on Apr 19, 2024

Commit

62d0d52

1 Parent(s): 50a7785

back to phi

Browse files

Files changed (2) hide show

README.md +2 -1
app.py +78 -28

README.md CHANGED Viewed

@@ -10,6 +10,7 @@ pinned: false
 preload_from_hub:
   - "microsoft/phi-2"
   - "BAAI/bge-small-en-v1.5"
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 preload_from_hub:
   - "microsoft/phi-2"
   - "BAAI/bge-small-en-v1.5"
+  - "HuggingFaceH4/zephyr-7b-alpha"
+  - "meta-llama/Meta-Llama-3-8B"
 ---

app.py CHANGED Viewed

@@ -20,12 +20,13 @@ CHEAPMODE = torch.cuda.is_available()
 # LLM = "HuggingFaceH4/zephyr-7b-alpha" if not CHEAPMODE else "microsoft/phi-2"
 config = {
-    "LLM": "meta-llama/Meta-Llama-3-8B",
-    # "LLM": "microsoft/phi-2",
     "embeddings": "BAAI/bge-small-en-v1.5",
     "similarity_top_k": 2,
     "context_window": 4048,
-    "max_new_tokens": 150,
     "temperature": 0.7,
     "top_k": 5,
     "top_p": 0.95,
@@ -42,17 +43,17 @@ title = "Ask my thesis: Intelligent Automation for AI-Driven Document Understand
 title = center_element(title)
 description = """Chat with the thesis manuscript by asking questions and receive answers with reference to the page.
-    <div class="span1">
     <a href="https://jordy-vl.github.io/assets/phdthesis/VanLandeghem_Jordy_PhD-thesis.pdf">
         <img src="https://ideogram.ai/api/images/direct/cc3Um6ClQkWJpVdXx6pWVA.png"
-              title="Thesis.pdf" alt="Ideogram image generated with prompt engineering"/></a>
-    </div>
     Technology used: [Llama-index](https://www.llamaindex.ai/), OS LLMs from HuggingFace
-    Spoiler: a RAG application with a >1B LLM and online vector store can be quite slow on a 290 page document ⏳
     """
-# width="250"
 description = center_element(description)
 def messages_to_prompt(messages):
@@ -105,6 +106,7 @@ def load_RAG_pipeline(config):
     # Llama-index
     Settings.llm = llm
     Settings.embed_model = HuggingFaceEmbedding(model_name=config["embeddings"])
     Settings.chunk_size = config["chunk_size"]
     Settings.chunk_overlap = config["chunk_overlap"]
@@ -125,23 +127,16 @@ default_query_engine = load_RAG_pipeline(config)
 # These are placeholder functions to simulate the behavior of the RAG setup.
 # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
-def get_answer(question, temperature, nucleus_sampling, max_tokens, query_engine=default_query_engine):
     # Here you should implement the logic to generate an answer based on the question and the document.
     # For example, you could use a machine learning model for RAG.
     # answer = "This is a placeholder answer."
     # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
     # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
-    if (
-        temperature != config["temperature"]
-        or nucleus_sampling != config["top_p"]
-        or max_tokens != config["max_new_tokens"]
-    ):
-        config["temperature"] = temperature
-        config["top_p"] = nucleus_sampling
-        config["max_new_tokens"] = max_tokens
-        query_engine = load_RAG_pipeline(config)
     response = query_engine.query(question)
     return response
@@ -156,32 +151,87 @@ def get_answer_page(response):
 # Create the gr.Interface function
 def ask_my_thesis(
-    question, temperature=config["temperature"], nucleus_sampling=config["top_p"], max_tokens=config["max_new_tokens"]
 ):
     print(f"Got Q: {question}")
-    answer = get_answer(question, temperature, nucleus_sampling, max_tokens)
     image, answer_page = get_answer_page(answer)
-    return answer, image, answer_page
 # Set up the interface options based on the design in the image.
 output_image = gr.Image(label="Answer Page")
 # examples
-examples = [["Who is Jordy Van Landeghem"], []]
 iface = gr.Interface(
     fn=ask_my_thesis,
     inputs=[gr.Textbox(label="Question", placeholder="Type your question here...")],
-    additional_inputs=[
-        gr.Slider(0, 1, value=0.7, label="Temperature"),
-        gr.Slider(0, 1, value=0.95, label="Nucleus Sampling"),
-        gr.Slider(1, 500, value=150, label="Max Generated Number of Tokens"),
-    ],
     outputs=[gr.Textbox(label="Answer"), output_image, gr.Label()],
     title=title,
     description=description,
-    allow_flagging="never",
 )
 # https://github.com/gradio-app/gradio/issues/4309

 # LLM = "HuggingFaceH4/zephyr-7b-alpha" if not CHEAPMODE else "microsoft/phi-2"
 config = {
+    # "LLM": "meta-llama/Meta-Llama-3-8B",
+    "LLM": "microsoft/phi-2",
+    # "LLM": "HuggingFaceH4/zephyr-7b-alpha",
     "embeddings": "BAAI/bge-small-en-v1.5",
     "similarity_top_k": 2,
     "context_window": 4048,
+    "max_new_tokens": 200,
     "temperature": 0.7,
     "top_k": 5,
     "top_p": 0.95,
 title = center_element(title)
 description = """Chat with the thesis manuscript by asking questions and receive answers with reference to the page.
+    <div class="center">
     <a href="https://jordy-vl.github.io/assets/phdthesis/VanLandeghem_Jordy_PhD-thesis.pdf">
         <img src="https://ideogram.ai/api/images/direct/cc3Um6ClQkWJpVdXx6pWVA.png"
+              title="Thesis.pdf" alt="Ideogram image generated with prompt engineering" width="500" class="center"/></a>
+    </div> Click the visual above to be redirected to the PDF of the manuscript.
     Technology used: [Llama-index](https://www.llamaindex.ai/), OS LLMs from HuggingFace
+    Spoiler: a quickly hacked together RAG application with a >1B LLM and online vector store can be quite slow on a 290 page document ⏳ (10s+)
     """
 description = center_element(description)
 def messages_to_prompt(messages):
     # Llama-index
     Settings.llm = llm
     Settings.embed_model = HuggingFaceEmbedding(model_name=config["embeddings"])
+    print(Settings)
     Settings.chunk_size = config["chunk_size"]
     Settings.chunk_overlap = config["chunk_overlap"]
 # These are placeholder functions to simulate the behavior of the RAG setup.
 # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
+def get_answer(question, config, query_engine=default_query_engine):
     # Here you should implement the logic to generate an answer based on the question and the document.
     # For example, you could use a machine learning model for RAG.
     # answer = "This is a placeholder answer."
     # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
     # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
     response = query_engine.query(question)
+    print(f"A: {response}")
     return response
 # Create the gr.Interface function
 def ask_my_thesis(
+    question,
+    LLM=config["LLM"],
+    embeddings=config["embeddings"],
+    similarity_top_k=config["similarity_top_k"],
+    context_window=config["context_window"],
+    max_new_tokens=config["max_new_tokens"],
+    temperature=config["temperature"],
+    top_k=config["top_k"],
+    top_p=config["top_p"],
+    chunk_size=config["chunk_size"],
+    chunk_overlap=config["chunk_overlap"],
 ):
     print(f"Got Q: {question}")
+    query_engine = default_query_engine
+    # if any change in kwargs
+    # Check if any of the kwargs have changed
+    if (
+        temperature != config["temperature"]
+        or top_p != config["top_p"]
+        or max_new_tokens != config["max_new_tokens"]
+        or LLM != config["LLM"]
+        or embeddings != config["embeddings"]
+        or similarity_top_k != config["similarity_top_k"]
+        or context_window != config["context_window"]
+        or top_k != config["top_k"]
+        or chunk_size != config["chunk_size"]
+        or chunk_overlap != config["chunk_overlap"]
+    ):
+        # Update the config dictionary with the new values
+        config["temperature"] = temperature
+        config["top_p"] = top_p
+        config["max_new_tokens"] = max_new_tokens
+        # config["LLM"] = LLM
+        # config["embeddings"] = embeddings
+        config["similarity_top_k"] = similarity_top_k
+        config["context_window"] = context_window
+        config["top_k"] = top_k
+        config["chunk_size"] = chunk_size
+        config["chunk_overlap"] = chunk_overlap
+        query_engine = load_RAG_pipeline(config)
+    answer = get_answer(question, config, query_engine=query_engine)
     image, answer_page = get_answer_page(answer)
+    return answer.response, image, answer_page
 # Set up the interface options based on the design in the image.
 output_image = gr.Image(label="Answer Page")
 # examples
+examples = [
+    ["What model is state-of-the-art on DUDE?"],
+    ["Why is knowledge distillation interesting?"],
+    ["What is ANLS?"],
+]
+# Define additional Gradio input elements
+additional_inputs = [
+    # gr.Input("text", label="Question"),
+    # gr.Input("text", label="LLM", value=config["LLM"]),
+    # gr.Input("text", label="Embeddings", value=config["embeddings"]),
+    gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
+    gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
+    gr.Slider(20, 250, value=config["max_new_tokens"], label="Max New Tokens"),
+    gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
+    gr.Slider(1, 10, value=config["top_k"], label="Top K"),
+    gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
+    gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
+    gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
+]
 iface = gr.Interface(
     fn=ask_my_thesis,
     inputs=[gr.Textbox(label="Question", placeholder="Type your question here...")],
+    additional_inputs=additional_inputs,
     outputs=[gr.Textbox(label="Answer"), output_image, gr.Label()],
+    examples=examples,
     title=title,
     description=description,
+    allow_flagging="auto",
+    cache_examples=True,
 )
 # https://github.com/gradio-app/gradio/issues/4309