Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,12 +10,14 @@ from nltk.corpus import stopwords
|
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
import unicodedata
|
|
|
|
| 13 |
|
| 14 |
nltk.download('punkt')
|
| 15 |
nltk.download('averaged_perceptron_tagger')
|
| 16 |
nltk.download('stopwords')
|
| 17 |
|
| 18 |
|
|
|
|
| 19 |
def get_paragraph(row, index):
|
| 20 |
ans = ''
|
| 21 |
for x in row[index]:
|
|
@@ -237,33 +239,52 @@ def get_article_recommendations(user_input):
|
|
| 237 |
return recommendations
|
| 238 |
|
| 239 |
|
| 240 |
-
def
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
return links
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
def validation(user_input):
|
| 256 |
-
user_words = set(user_input.lower().split())
|
| 257 |
-
if any(word not in stop_words for word in user_words):
|
| 258 |
-
return "valid"
|
| 259 |
else:
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
validation_interface = gradio.Interface(
|
| 264 |
fn=validation,
|
| 265 |
inputs="text",
|
| 266 |
-
outputs=gradio.outputs.
|
| 267 |
title="Validation API - Testing API of ScholarSync",
|
| 268 |
description="API to validate user input"
|
| 269 |
)
|
|
|
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
import unicodedata
|
| 13 |
+
import json
|
| 14 |
|
| 15 |
nltk.download('punkt')
|
| 16 |
nltk.download('averaged_perceptron_tagger')
|
| 17 |
nltk.download('stopwords')
|
| 18 |
|
| 19 |
|
| 20 |
+
|
| 21 |
def get_paragraph(row, index):
|
| 22 |
ans = ''
|
| 23 |
for x in row[index]:
|
|
|
|
| 239 |
return recommendations
|
| 240 |
|
| 241 |
|
| 242 |
+
def validation(text):
|
| 243 |
+
words = word_tokenize(text)
|
| 244 |
+
# Perform part-of-speech tagging
|
| 245 |
+
tagged_words = pos_tag(words)
|
| 246 |
+
# Check if any adjective or noun is present
|
| 247 |
+
adjectives = [word for word, pos in tagged_words if pos.startswith('JJ')]
|
| 248 |
+
nouns = [word for word, pos in tagged_words if pos.startswith('NN')]
|
| 249 |
+
|
| 250 |
+
result = {}
|
| 251 |
+
|
| 252 |
+
if not adjectives and not nouns:
|
| 253 |
+
result['validation'] = 'invalid'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
else:
|
| 255 |
+
adjective_str = ' '.join(adjectives)
|
| 256 |
+
noun_str = ' '.join(nouns)
|
| 257 |
+
combined_sentence = f"{adjective_str} {noun_str}"
|
| 258 |
+
result['validation'] = 'valid'
|
| 259 |
+
result['sentence'] = combined_sentence
|
| 260 |
+
|
| 261 |
+
return json.dumps(result, indent=4)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def get_links(user_input):
|
| 265 |
+
check=validation(user_input)
|
| 266 |
+
if check['validation'] == 'valid':
|
| 267 |
+
recommendations = get_article_recommendations(check['sentence'])
|
| 268 |
+
links = []
|
| 269 |
+
for article in recommendations:
|
| 270 |
+
cosine_similarity, article_id, journal_id = article
|
| 271 |
+
link = {
|
| 272 |
+
"title": journal_main['article_df'][journal_id].iloc[article_id, 0],
|
| 273 |
+
"url": journal_main['article_df'][journal_id].iloc[article_id, 1],
|
| 274 |
+
"article_id": int(article_id),
|
| 275 |
+
"journal_id": int(journal_id)
|
| 276 |
+
}
|
| 277 |
+
links.append(link)
|
| 278 |
+
return links
|
| 279 |
+
else:
|
| 280 |
+
return []
|
| 281 |
+
|
| 282 |
+
|
| 283 |
|
| 284 |
validation_interface = gradio.Interface(
|
| 285 |
fn=validation,
|
| 286 |
inputs="text",
|
| 287 |
+
outputs=gradio.outputs.JSON(),
|
| 288 |
title="Validation API - Testing API of ScholarSync",
|
| 289 |
description="API to validate user input"
|
| 290 |
)
|