Spaces:

RandomNameAnd6
/

DharGPT-Demo

Sleeping

App Files Files Community

RandomNameAnd6 commited on Jun 14, 2024

Commit

7e56099

verified ·

1 Parent(s): 8463291

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -2

app.py CHANGED Viewed

@@ -19,11 +19,70 @@ def generate_text(prompt):
 with open('dhar_mann_titles.txt', 'r') as file:
     dhar_mann_titles = file.readlines()
-# Function to generate an AI title (dummy implementation)
 def generate_ai_title():
     inputs = tokenizer(["<|startoftext|>"]*1, return_tensors = "pt")
     outputs = model.generate(**inputs, max_new_tokens=50, use_cache=True, temperature=0.85, do_sample=True)
-    return (tokenizer.batch_decode(outputs)[0])[15:-13]
 # Function to check user's answer and update score
 def check_answer(user_choice, real_index, score):

 with open('dhar_mann_titles.txt', 'r') as file:
     dhar_mann_titles = file.readlines()
+def levenshtein_distance(s1, s2):
+    """
+    Compute the Levenshtein distance between two strings.
+    Parameters:
+    - s1 (str): The first string.
+    - s2 (str): The second string.
+    Returns:
+    - int: The Levenshtein distance between the two strings.
+    """
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+    if len(s2) == 0:
+        return len(s1)
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    return previous_row[-1]
+def string_similarity_index(original_text, comparison_text, threshold=0.6):
+    """
+    Calculate the similarity index between two strings based on Levenshtein distance
+    and compare it to a threshold.
+    Parameters:
+    - original_text (str): The original text.
+    - comparison_text (str): The text to compare for similarity.
+    - threshold (float): The non-original threshold score (0 to 1).
+    Returns:
+    - bool: True if the similarity score is above the threshold, False otherwise.
+    """
+    # Calculate the Levenshtein distance
+    distance = levenshtein_distance(original_text, comparison_text)
+    # Calculate the maximum possible distance
+    max_distance = max(len(original_text), len(comparison_text))
+    # Calculate the similarity score
+    similarity_score = 1 - distance / max_distance
+    # Compare the similarity score to the threshold
+    return similarity_score >= threshold
+# Function to generate an AI title
 def generate_ai_title():
     inputs = tokenizer(["<|startoftext|>"]*1, return_tensors = "pt")
     outputs = model.generate(**inputs, max_new_tokens=50, use_cache=True, temperature=0.85, do_sample=True)
+    generated_title = (tokenizer.batch_decode(outputs)[0])[15:-13]
+    for title in dhar_mann_titles:
+        title = title.strip()  # Remove any extra whitespace characters like newlines
+        if string_similarity_index(input_text, title):
+            return generate_ai_title()
+    return generated_title
 # Function to check user's answer and update score
 def check_answer(user_choice, real_index, score):