electric-otter commited on
Commit
abd995d
·
verified ·
1 Parent(s): 6d9c2d9

Update nlp.py

Browse files
Files changed (1) hide show
  1. nlp.py +33 -19
nlp.py CHANGED
@@ -7,28 +7,41 @@ from keras.layers import LSTM, Embedding, Dense
7
  import numpy as np
8
  import random
9
 
10
- # List of predefined topics and their corresponding URLs
11
  topics = {
12
- "Technology": [
13
- f"https://geeksforgeeks.org{query}",
14
- f"https://theverge.com/tech{query}",
15
- ],
16
- "Science": [
17
- f"https://oercommons.org/hubs/NSDL{query}",
18
- ],
19
- "History": [
20
- f"https://history.com{query}",
21
- ],
22
- "Math": []
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
 
25
  # Randomly select a topic
26
  selected_topic = random.choice(list(topics.keys()))
27
  print(f"Selected topic: {selected_topic}")
28
 
29
- # Fetch data from predefined URLs
30
- def fetch_data(url):
31
- response = requests.get(url)
 
32
  soup = BeautifulSoup(response.content, 'html.parser')
33
  return soup.get_text()
34
 
@@ -53,10 +66,11 @@ def solve_math_problem():
53
  # Load data or generate math problem
54
  if selected_topic != "Math":
55
  data = ""
56
- for url in topics[selected_topic]:
57
- data += fetch_data(url)
58
  else:
59
- data = "math topic"
 
60
 
61
  # Tokenization
62
  tokenizer = Tokenizer()
@@ -102,7 +116,7 @@ def generate_text(model, tokenizer, max_sequence_len, input_text, num_words):
102
  return input_text
103
 
104
  # Get initial input text and number of words to generate
105
- initial_input_text = "LiteMachine: "
106
  num_words = 100 # Number of words to generate
107
 
108
  # Generate text
 
7
  import numpy as np
8
  import random
9
 
10
+ # List of predefined topics, their queries, and corresponding URLs
11
  topics = {
12
+ "Technology": {
13
+ "query": "latest technology news",
14
+ "urls": [
15
+ "https://geeksforgeeks.org",
16
+ "https://theverge.com",
17
+ ]
18
+ },
19
+ "Science": {
20
+ "query": "latest science discoveries",
21
+ "urls": [
22
+ "https://oercommons.org/hubs/NSDL",
23
+ ]
24
+ },
25
+ "History": {
26
+ "query": "historical events",
27
+ "urls": [
28
+ "https://history.com",
29
+ ]
30
+ },
31
+ "Math": {
32
+ "query": "",
33
+ "urls": []
34
+ }
35
  }
36
 
37
  # Randomly select a topic
38
  selected_topic = random.choice(list(topics.keys()))
39
  print(f"Selected topic: {selected_topic}")
40
 
41
+ # Fetch data from predefined URLs with queries
42
+ def fetch_data(url, query):
43
+ search_url = f"{url}/search?q={query}"
44
+ response = requests.get(search_url)
45
  soup = BeautifulSoup(response.content, 'html.parser')
46
  return soup.get_text()
47
 
 
66
  # Load data or generate math problem
67
  if selected_topic != "Math":
68
  data = ""
69
+ for url in topics[selected_topic]["urls"]:
70
+ data += fetch_data(url, topics[selected_topic]["query"])
71
  else:
72
+ # Create a dummy data string for tokenization and sequence generation
73
+ data = "This is a sample text for math topic."
74
 
75
  # Tokenization
76
  tokenizer = Tokenizer()
 
116
  return input_text
117
 
118
  # Get initial input text and number of words to generate
119
+ initial_input_text = "This is a generated text"
120
  num_words = 100 # Number of words to generate
121
 
122
  # Generate text