Spaces:

Shreyas94
/

SentinelAI102

Sleeping

App Files Files Community

Shreyas94 commited on Jun 13, 2024

Commit

eb9ecb0

verified ·

1 Parent(s): 9d2f962

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -17

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ import requests
 from bs4 import BeautifulSoup
 import torch
 import gradio as gr
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from huggingface_hub import InferenceClient
 import logging
 # Set up logging
@@ -14,12 +13,12 @@ logger = logging.getLogger(__name__)
 # Define device and load model and tokenizer
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
 # Load model and tokenizer
 try:
     logger.debug("Attempting to load the model and tokenizer")
-    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     logger.debug("Model and tokenizer loaded successfully")
 except Exception as e:
@@ -27,9 +26,6 @@ except Exception as e:
     model = None
     tokenizer = None
-# Assert to ensure tokenizer is loaded
-assert tokenizer is not None, "Tokenizer failed to load and is None"
 # Function to perform a Google search and return the results
 def search(term, num_results=2, lang="en", timeout=5, safe="active", ssl_verify=None):
     logger.debug(f"Starting search for term: {term}")
@@ -94,13 +90,10 @@ def extract_text_from_webpage(html_content):
 # Function to format the prompt for the language model
 def format_prompt(user_prompt, chat_history):
     logger.debug(f"Formatting prompt with user prompt: {user_prompt} and chat history: {chat_history}")
-    prompt = "<s>"
     for item in chat_history:
-        if isinstance(item, tuple):
-            prompt += f"[INST] {item[0]} [/INST] {item[1]}</s>"
-        else:
-            prompt += f" [Image] "
-    prompt += f"[INST] {user_prompt} [/INST]"
     logger.debug(f"Formatted prompt: {prompt}")
     return prompt
@@ -109,7 +102,6 @@ def model_inference(
         user_prompt,
         chat_history,
         web_search,
-        decoding_strategy,
         temperature,
         max_new_tokens,
         repetition_penalty,
@@ -167,9 +159,9 @@ def model_inference(
 # Define Gradio interface components
 max_new_tokens = gr.Slider(
-    minimum=2048,
     maximum=16000,
-    value=4096,
     step=64,
     interactive=True,
     label="Maximum number of new tokens to generate",
@@ -231,7 +223,6 @@ def chat_interface(user_input, history, web_search, decoding_strategy, temperatu
         user_input,
         history,
         web_search,
-        decoding_strategy,
         temperature,
         max_new_tokens,
         repetition_penalty,

 from bs4 import BeautifulSoup
 import torch
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import logging
 # Set up logging
 # Define device and load model and tokenizer
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_NAME = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 # Load model and tokenizer
 try:
     logger.debug("Attempting to load the model and tokenizer")
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     logger.debug("Model and tokenizer loaded successfully")
 except Exception as e:
     model = None
     tokenizer = None
 # Function to perform a Google search and return the results
 def search(term, num_results=2, lang="en", timeout=5, safe="active", ssl_verify=None):
     logger.debug(f"Starting search for term: {term}")
 # Function to format the prompt for the language model
 def format_prompt(user_prompt, chat_history):
     logger.debug(f"Formatting prompt with user prompt: {user_prompt} and chat history: {chat_history}")
+    prompt = ""
     for item in chat_history:
+        prompt += f"User: {item[0]}\nAssistant: {item[1]}\n"
+    prompt += f"User: {user_prompt}\nAssistant:"
     logger.debug(f"Formatted prompt: {prompt}")
     return prompt
         user_prompt,
         chat_history,
         web_search,
         temperature,
         max_new_tokens,
         repetition_penalty,
 # Define Gradio interface components
 max_new_tokens = gr.Slider(
+    minimum=1,
     maximum=16000,
+    value=2048,
     step=64,
     interactive=True,
     label="Maximum number of new tokens to generate",
         user_input,
         history,
         web_search,
         temperature,
         max_new_tokens,
         repetition_penalty,