electric-otter
/

litemachine-quick-4

Model card Files Files and versions

electric-otter commited on Jan 15, 2025

Commit

abd995d

·

verified ·

1 Parent(s): 6d9c2d9

Update nlp.py

Files changed (1) hide show

nlp.py +33 -19

nlp.py CHANGED Viewed

@@ -7,28 +7,41 @@ from keras.layers import LSTM, Embedding, Dense
 import numpy as np
 import random
-# List of predefined topics and their corresponding URLs
 topics = {
-    "Technology": [
-        f"https://geeksforgeeks.org{query}",
-        f"https://theverge.com/tech{query}",
-    ],
-    "Science": [
-        f"https://oercommons.org/hubs/NSDL{query}",
-    ],
-    "History": [
-        f"https://history.com{query}",
-    ],
-    "Math": []
 }
 # Randomly select a topic
 selected_topic = random.choice(list(topics.keys()))
 print(f"Selected topic: {selected_topic}")
-# Fetch data from predefined URLs
-def fetch_data(url):
-    response = requests.get(url)
     soup = BeautifulSoup(response.content, 'html.parser')
     return soup.get_text()
@@ -53,10 +66,11 @@ def solve_math_problem():
 # Load data or generate math problem
 if selected_topic != "Math":
     data = ""
-    for url in topics[selected_topic]:
-        data += fetch_data(url)
 else:
-    data = "math topic"
 # Tokenization
 tokenizer = Tokenizer()
@@ -102,7 +116,7 @@ def generate_text(model, tokenizer, max_sequence_len, input_text, num_words):
     return input_text
 # Get initial input text and number of words to generate
-initial_input_text = "LiteMachine: "
 num_words = 100  # Number of words to generate
 # Generate text

 import numpy as np
 import random
+# List of predefined topics, their queries, and corresponding URLs
 topics = {
+    "Technology": {
+        "query": "latest technology news",
+        "urls": [
+            "https://geeksforgeeks.org",
+            "https://theverge.com",
+        ]
+    },
+    "Science": {
+        "query": "latest science discoveries",
+        "urls": [
+            "https://oercommons.org/hubs/NSDL",
+        ]
+    },
+    "History": {
+        "query": "historical events",
+        "urls": [
+            "https://history.com",
+        ]
+    },
+    "Math": {
+        "query": "",
+        "urls": []
+    }
 }
 # Randomly select a topic
 selected_topic = random.choice(list(topics.keys()))
 print(f"Selected topic: {selected_topic}")
+# Fetch data from predefined URLs with queries
+def fetch_data(url, query):
+    search_url = f"{url}/search?q={query}"
+    response = requests.get(search_url)
     soup = BeautifulSoup(response.content, 'html.parser')
     return soup.get_text()
 # Load data or generate math problem
 if selected_topic != "Math":
     data = ""
+    for url in topics[selected_topic]["urls"]:
+        data += fetch_data(url, topics[selected_topic]["query"])
 else:
+    # Create a dummy data string for tokenization and sequence generation
+    data = "This is a sample text for math topic."
 # Tokenization
 tokenizer = Tokenizer()
     return input_text
 # Get initial input text and number of words to generate
+initial_input_text = "This is a generated text"
 num_words = 100  # Number of words to generate
 # Generate text