Sentinel-AI-Web-Search-Test-v2-Testing-Score

Build error

App Files Files Community

Shreyas094 commited on Jun 20, 2024

Commit

8f4e927

verified ·

1 Parent(s): 664e897

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -100

app.py CHANGED Viewed

@@ -1,10 +1,7 @@
-import random
-import requests
-from bs4 import BeautifulSoup
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from huggingface_hub import login
-import os
 # Directly assign your Hugging Face token here
 hf_token = "your_hugging_face_api_token"
@@ -12,94 +9,6 @@ hf_token = "your_hugging_face_api_token"
 # Log in to Hugging Face
 login(token=hf_token)
-# List of user agents
-_useragent_list = [
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
-]
-# Function to extract visible text from HTML content of a webpage
-def extract_text_from_webpage(html):
-    print("Extracting text from webpage...")
-    soup = BeautifulSoup(html, 'html.parser')
-    for script in soup(["script", "style"]):
-        script.extract()  # Remove scripts and styles
-    text = soup.get_text()
-    lines = (line.strip() for line in text.splitlines())
-    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-    text = '\n'.join(chunk for chunk in chunks if chunk)
-    print(f"Extracted text length: {len(text)}")
-    return text
-# Function to perform a Google search and retrieve results
-def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_verify=None):
-    """Performs a Google search and returns the results."""
-    print(f"Searching for term: {term}")
-    escaped_term = requests.utils.quote(term)
-    start = 0
-    all_results = []
-    max_chars_per_page = 8000  # Limit the number of characters from each webpage to stay under the token limit
-    with requests.Session() as session:
-        while start < num_results:
-            print(f"Fetching search results starting from: {start}")
-            try:
-                # Choose a random user agent
-                user_agent = random.choice(_useragent_list)
-                headers = {
-                    'User-Agent': user_agent
-                }
-                print(f"Using User-Agent: {headers['User-Agent']}")
-                resp = session.get(
-                    url="https://www.google.com/search",
-                    headers=headers,
-                    params={
-                        "q": term,
-                        "num": num_results - start,
-                        "hl": lang,
-                        "start": start,
-                        "safe": safe,
-                    },
-                    timeout=timeout,
-                    verify=ssl_verify,
-                )
-                resp.raise_for_status()
-            except requests.exceptions.RequestException as e:
-                print(f"Error fetching search results: {e}")
-                break
-            soup = BeautifulSoup(resp.text, "html.parser")
-            result_block = soup.find_all("div", attrs={"class": "g"})
-            if not result_block:
-                print("No more results found.")
-                break
-            for result in result_block:
-                link = result.find("a", href=True)
-                if link:
-                    link = link["href"]
-                    print(f"Found link: {link}")
-                    try:
-                        webpage = session.get(link, headers=headers, timeout=timeout)
-                        webpage.raise_for_status()
-                        visible_text = extract_text_from_webpage(webpage.text)
-                        if len(visible_text) > max_chars_per_page:
-                            visible_text = visible_text[:max_chars_per_page] + "..."
-                        all_results.append({"link": link, "text": visible_text})
-                    except requests.exceptions.RequestException as e:
-                        print(f"Error fetching or processing {link}: {e}")
-                        all_results.append({"link": link, "text": None})
-                else:
-                    print("No link found in result.")
-                    all_results.append({"link": None, "text": None})
-            start += len(result_block)
-    print(f"Total results fetched: {len(all_results)}")
-    return all_results
 # Load the Mixtral-8x7B-Instruct model and tokenizer with authorization header
 model_name = 'mistralai/Mistral-7B-Instruct-v0.3'
 headers = {"Authorization": f"Bearer {hf_token}"}
@@ -117,15 +26,11 @@ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-# Example usage
-search_term = "How did Tesla perform in Q1 2024"
-search_results = google_search(search_term, num_results=3)
-# Combine text from search results to create a prompt
-combined_text = "\n\n".join(result['text'] for result in search_results if result['text'])
 # Tokenize the input text
-inputs = tokenizer(combined_text, return_tensors="pt").to(device)
 # Generate a response
 outputs = model.generate(**inputs, max_length=150, temperature=0.7, top_p=0.9, top_k=50)

+import os
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
 # Directly assign your Hugging Face token here
 hf_token = "your_hugging_face_api_token"
 # Log in to Hugging Face
 login(token=hf_token)
 # Load the Mixtral-8x7B-Instruct model and tokenizer with authorization header
 model_name = 'mistralai/Mistral-7B-Instruct-v0.3'
 headers = {"Authorization": f"Bearer {hf_token}"}
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+# Example text input
+text_input = "How did Tesla perform in Q1 2024?"
 # Tokenize the input text
+inputs = tokenizer(text_input, return_tensors="pt").to(device)
 # Generate a response
 outputs = model.generate(**inputs, max_length=150, temperature=0.7, top_p=0.9, top_k=50)