Spaces:

AEUPH
/

FUTURE-SELF-CONVERSATION

Sleeping

App Files Files Community

AEUPH commited on Mar 11, 2025

Commit

62c045c

verified ·

1 Parent(s): 1df195d

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -31

app.py CHANGED Viewed

@@ -71,7 +71,7 @@ class TemporalPredictionSystem:
         self.self_model_accuracy = 0.2  # Starts low, improves with conversation
         self.model_growth_rate = 0
         self.total_interactions = 0
         # Response generators
         self.response_templates = [
             "Based on your current trajectory, I see {{future}}.",
@@ -108,38 +108,53 @@ class TemporalPredictionSystem:
         self.fetch_quotes_data()
     def fetch_quotes_data(self):
-        """Fetch quotes data from Hugging Face dataset API"""
-        try:
-            response = requests.get(
-                "https://datasets-server.huggingface.co/rows?dataset=Abirate%2Fenglish_quotes&config=default&split=train&offset=0&length=100"
-            )
-            data = response.json()
-            if data and "rows" in data:
-                for row in data["rows"]:
-                    if row and "row" in row and "quote" in row["row"]:
-                        quote = row["row"]["quote"]
-                        self.quotes_data.append(quote)
-                        words = [w for w in re.sub(r'[^\w\s]', '', quote.lower()).split() if len(w) > 3]
-                        self.word_corpus = list(set(self.word_corpus + words))
-                        if "tags" in row["row"]:
-                            tags = [row["row"]["tags"]] if not isinstance(row["row"]["tags"], list) else row["row"]["tags"]
-                            self.tags_data.extend(tags)
-                            tag_words = [tag.lower() for tag in tags if len(tag) > 3]
-                            self.word_corpus = list(set(self.word_corpus + tag_words))
-                print(f"Loaded {len(self.quotes_data)} quotes and {len(self.tags_data)} tags")
-                print(f"Word corpus expanded to {len(self.word_corpus)} words")
-                for word in self.word_corpus:
-                    self.topic_extractor[word] = 1
-                    self.topic_relations[word] = []
-                self.build_initial_markov_chain()
-        except Exception as e:
-            print(f"Error fetching quotes data: {e}")
             self.build_initial_markov_chain()
-        self.initialize()
     def build_initial_markov_chain(self):
         """Build a Markov chain from quotes and corpus data"""

         self.self_model_accuracy = 0.2  # Starts low, improves with conversation
         self.model_growth_rate = 0
         self.total_interactions = 0
+        self.quote_offset = 0
         # Response generators
         self.response_templates = [
             "Based on your current trajectory, I see {{future}}.",
         self.fetch_quotes_data()
     def fetch_quotes_data(self):
+    """Fetch quotes data from HuggingFace dataset API with dynamic offset for continuous expansion."""
+    try:
+        url = f"https://datasets-server.huggingface.co/rows?dataset=Abirate%2Fenglish_quotes&config=default&split=train&offset={self.quote_offset}&length=100"
+        response = requests.get(url)
+        data = response.json()
+        if data and "rows" in data:
+            for row in data["rows"]:
+                if row and "row" in row and "quote" in row["row"]:
+                    quote = row["row"]["quote"]
+                    self.quotes_data.append(quote)
+                    # Process quote into words for corpus
+                    words = [w for w in re.sub(r'[^\w\s]', '', quote.lower()).split() if len(w) > 3]
+                    self.word_corpus = list(set(self.word_corpus + words))
+                    # Extract tags if available
+                    if "tags" in row["row"]:
+                        tags = [row["row"]["tags"]] if not isinstance(row["row"]["tags"], list) else row["row"]["tags"]
+                        self.tags_data.extend(tags)
+                        # Add tags to corpus
+                        tag_words = [tag.lower() for tag in tags if len(tag) > 3]
+                        self.word_corpus = list(set(self.word_corpus + tag_words))
+            print(f"Loaded {len(self.quotes_data)} quotes and {len(self.tags_data)} tags")
+            print(f"Word corpus expanded to {len(self.word_corpus)} words")
+            # Initialize topic extractor with corpus
+            for word in self.word_corpus:
+                self.topic_extractor[word] = 1
+                self.topic_relations[word] = []
+            # Build Markov chain
             self.build_initial_markov_chain()
+        # Increment the offset for next iteration to load additional quotes
+        self.quote_offset += 100
+    except Exception as e:
+        print(f"Error fetching quotes data: {e}")
+        # Build the Markov chain with default corpus if fetch fails
+        self.build_initial_markov_chain()
+    # Initialize the system (or reinitialize if desired)
+    self.initialize()
     def build_initial_markov_chain(self):
         """Build a Markov chain from quotes and corpus data"""