Spaces:

abhisheky127
/

Fold_TransactionClassification

Sleeping

App Files Files Community

abhisheky127 commited on Jul 8, 2023

Commit

ed3015b

1 Parent(s): ee3f8bf

adding keyword crawling from google api in the flow

Browse files

Files changed (1) hide show

app.py +30 -25

app.py CHANGED Viewed

@@ -1,6 +1,31 @@
 import gradio as gr
 import re
 from transformers import pipeline
 title = "Fold: Contextual Tag Recommendation System"
 description = "powered by bart-large-mnli, made by @abhisheky127"
@@ -8,42 +33,22 @@ description = "powered by bart-large-mnli, made by @abhisheky127"
 classifier = pipeline("zero-shot-classification",
                       model="facebook/bart-large-mnli")
 #define a function to process your input and output
 def zero_shot(doc, candidates):
     given_labels = candidates.split(",")
     given_labels = list(map(str.strip, given_labels))
     doc = preprocess(doc)
     print(doc)
     dictionary = classifier(doc, given_labels)
     labels = dictionary['labels']
     scores = dictionary['scores']
     return dict(zip(labels, scores))
-#define a function to preprocess transaction query
-# def preprocess(transaction):
-#     pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
-#     match = re.search(pattern, transaction)
-#     if match:
-#         return match.group(1).strip()
-#     return None
-# def preprocess(transaction):
-#     remove_words = ["pos", "mps", "bil", "onl"]
-#     # Convert to lowercase
-#     transaction = transaction.lower()
-#     # Remove unwanted words
-#     for word in remove_words:
-#         transaction = transaction.replace(word, "")
-#     # Remove special characters and digits
-#     transaction = re.sub(r"[^a-z\s]+", "", transaction)
-#     # Remove extra spaces
-#     transaction = re.sub(r"\s+", " ", transaction).strip()
-#     return transaction
 def preprocess(text):
     # Remove digits
     cleaned_text = re.sub(r'\d', '', text)

 import gradio as gr
 import re
 from transformers import pipeline
+from googlesearch import search
+import requests
+from bs4 import BeautifulSoup
+def get_google_description(keyword):
+    query = keyword
+    results = search(query, num_results=1, lang='en')
+    for result in results:
+        description = get_description_from_url(result)
+        if description:
+            return description
+    return keyword
+def get_description_from_url(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    description_tag = soup.find('meta', {'name': 'description'})
+    if description_tag:
+        return description_tag.get('content')
+    return None
 title = "Fold: Contextual Tag Recommendation System"
 description = "powered by bart-large-mnli, made by @abhisheky127"
 classifier = pipeline("zero-shot-classification",
                       model="facebook/bart-large-mnli")
 #define a function to process your input and output
 def zero_shot(doc, candidates):
     given_labels = candidates.split(",")
     given_labels = list(map(str.strip, given_labels))
     doc = preprocess(doc)
+    doc = get_google_description(doc)
     print(doc)
     dictionary = classifier(doc, given_labels)
     labels = dictionary['labels']
     scores = dictionary['scores']
     return dict(zip(labels, scores))
 def preprocess(text):
     # Remove digits
     cleaned_text = re.sub(r'\d', '', text)