Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 8, 2024

Commit

bda7944

verified ·

1 Parent(s): 9ccd468

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -34

app.py CHANGED Viewed

@@ -5,18 +5,22 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
 # Initialize the Llama model
-llm = Llama(
-    # model_path=hf_hub_download(
-    #     repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
-    #     filename="Phi-3-mini-4k-instruct-q4.gguf",
-    # ),
-    model_path = "./models/Phi-3-mini-4k-instruct-gguf",
-    # model_path = "NicholasJohn/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
-    n_ctx=2048,
-    n_gpu_layers=50,  # Adjust based on your VRAM
-)
 # Initialize ChromaDB Vector Store
 class VectorStore:
@@ -38,9 +42,6 @@ class VectorStore:
 # Example initialization (assuming you've already populated the vector store)
 vector_store = VectorStore("embedding_vector")
-# Populate with your data if not already done
-# vector_store.populate_vectors(your_texts, your_ids)
 def generate_text(
     message,
     history: list[tuple[str, str]],
@@ -58,40 +59,40 @@ def generate_text(
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
     temp = ""
-    output = llm(
-        input_prompt,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=40,
-        repeat_penalty=1.1,
-        max_tokens=max_tokens,
-        stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
-        stream=True,
-    )
-    for out in output:
-        temp += out["choices"][0]["text"]
-        yield temp
 # Define the Gradio interface
 demo = gr.ChatInterface(
     generate_text,
-    title="llama-cpp-python on GPU with ChromaDB",
-    description="Running LLM with context retrieval from ChromaDB",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
 )
 if __name__ == "__main__":

 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
+import logging
+# Initialize logging
+logging.basicConfig(level=logging.INFO)
 # Initialize the Llama model
+try:
+    llm = Llama(
+        model_path="./models/Phi-3-mini-4k-instruct-gguf",
+        n_ctx=2048,
+        n_gpu_layers=50,  # Adjust based on your VRAM
+    )
+    logging.info("Llama model loaded successfully.")
+except Exception as e:
+    logging.error(f"Error loading Llama model: {e}")
+    raise
 # Initialize ChromaDB Vector Store
 class VectorStore:
 # Example initialization (assuming you've already populated the vector store)
 vector_store = VectorStore("embedding_vector")
 def generate_text(
     message,
     history: list[tuple[str, str]],
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
+    logging.info("Input prompt:\n%s", input_prompt)  # Debugging output
     temp = ""
+    try:
+        output = llm(
+            input_prompt,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=40,
+            repeat_penalty=1.1,
+            max_tokens=max_tokens,
+            stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
+            stream=True,
+        )
+        for out in output:
+            temp += out["choices"][0]["text"]
+            logging.info("Model output:\n%s", temp)  # Log model output
+            yield temp
+    except Exception as e:
+        logging.error(f"Error during text generation: {e}")
+        yield "An error occurred during text generation."
 # Define the Gradio interface
 demo = gr.ChatInterface(
     generate_text,
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
+        ["Some good dessert with leftover cake"]
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
 )
 if __name__ == "__main__":