sneakpic / api /apitest.py
efraimdahl
added stopword download
8e72555
#Testing and setup suite for API functionality
from api.scraping import scrapePage
from api.analysis import analyseSite, parseImgLis
from api.searchImages import performQueries
import nltk
def setup():
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')
#Test data:
#Scraping
urllist = ["https://stackoverflow.com/questions/1052772/is-there-a-keyboard-shortcut-to-untab-move-a-block-of-code-to-the-left-in-ec","https://www.gocusdom.com/","https://en.wikipedia.org/wiki/Heron","https://brandlume.com/12-proven-ways-to-make-your-website-stand-out/"]
exampleReq1 = {"url": urllist[0],"use_images": False,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24}
exampleReq2 = {"url": urllist[1],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24}
exampleReq3 = {"url": urllist[2],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24}
exampleReq4 = {"url": urllist[3],"use_images": True,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24}
exampleReq0 = {"url": "invalidurl","use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24}
exampleReq5 = {"url": urllist[0],"use_images": False,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10, "num_query_keywords":5,"result_images":24}
errorFreeReqs = [exampleReq1, exampleReq2, exampleReq3, exampleReq4]
def pageScraperTest():
res = scrapePage(exampleReq0)
assert(res=={"error": "scraping.py: url is not recognized as a valid url."})
res1 = scrapePage(exampleReq1)
assert("images" not in res1)
assert(len(res1['text'])>0)
res2 = scrapePage(exampleReq2)
assert(len(res2["images"])>1)
assert(len(res2['text'])>0)
res3 = scrapePage(exampleReq3)
print(res3)
assert(len(res3["images"])>1)
assert(len(res3['text'])>0)
res4 = scrapePage(exampleReq4)
assert(len(res4["images"])>1)
assert("text" not in res4)
print("Successfully passed scraping suite")
return [res1,res2,res3,res4]
#PART 2: Analysis
exampleRetPreParse = {'keywords_images': {'0': [{'keyword': 'sign', 'score': 0.6898373278547406}, {'keyword': 'symbol', 'score': 0.4472937186719514}, {'keyword': 'design', 'score': 0.2713939913056557}, {'keyword': 'architecture', 'score': 0.2713049676643986}, {'keyword': 'illustration', 'score': 0.26296517362059374}, {'keyword': 'success', 'score': 0.15815140125121907}, {'keyword': 'night', 'score': 0.1436041312950442}, {'keyword': 'icon', 'score': 0.14356085864611598}, {'keyword': 'finance', 'score': 0.13994914050650978}, {'keyword': 'ideas', 'score': 0.13742065469130105}]}, 'keywords_text': ['web', 'design', 'website', 'cusdom', 'business',
'project', 'experience', 'user', 'college', 'startup']}
def analysisTest(scraperData):
#Data if use_image and use_text are set to 0
res = analyseSite(scraperData[0],exampleReq5)
assert(res=={"error":"analysis.py, problem encountered when analysing site data"})
res2 = analyseSite(scraperData[0],exampleReq1)
assert(len(res2["queries"])==3)
res3 = analyseSite(scraperData[1],exampleReq2)
assert("error" not in res3)
res4 = analyseSite(scraperData[2],exampleReq3)
assert("error" not in res4)
res5 = analyseSite(scraperData[3],exampleReq4)
assert("error" not in res5)
print("passed analysis testing suite")
return[res2,res3,res4,res5]
def queryTest(req):
res1 = performQueries(req[0])
assert("error" not in res1)
res2 = performQueries(req[1])
assert("error" not in res2)
res3 = performQueries(req[2])
assert("error" not in res3)
res4 = performQueries(req[3])
assert("error" not in res4)
print("Passed image search suite")
def completeTest(req):
scrapedData = scrapePage(req)
if("error" in scrapedData):
print(scrapedData)
return scrapedData
queries = analyseSite(scrapedData,req)
if("error") in queries:
print(queries)
return queries
res = performQueries(queries)
if("error") in res:
print(res)
return res
if __name__ == '__main__':
setup()
print("starting error free requests")
for req in errorFreeReqs:
print("requesting", req["url"])
completeTest(req)
print("Finished error free requests, if no errors are printed above we are good to go.")
#print("completed example test")
#scraperData = pageScraperTest()
#Analysis
#queries = analysisTest(scraperData)
#queryTest(queries)
#