#Testing and setup suite for API functionality from api.scraping import scrapePage from api.analysis import analyseSite, parseImgLis from api.searchImages import performQueries import nltk def setup(): nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('wordnet') nltk.download('stopwords') #Test data: #Scraping urllist = ["https://stackoverflow.com/questions/1052772/is-there-a-keyboard-shortcut-to-untab-move-a-block-of-code-to-the-left-in-ec","https://www.gocusdom.com/","https://en.wikipedia.org/wiki/Heron","https://brandlume.com/12-proven-ways-to-make-your-website-stand-out/"] exampleReq1 = {"url": urllist[0],"use_images": False,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} exampleReq2 = {"url": urllist[1],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} exampleReq3 = {"url": urllist[2],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} exampleReq4 = {"url": urllist[3],"use_images": True,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} exampleReq0 = {"url": "invalidurl","use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} exampleReq5 = {"url": urllist[0],"use_images": False,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10, "num_query_keywords":5,"result_images":24} errorFreeReqs = [exampleReq1, exampleReq2, exampleReq3, exampleReq4] def pageScraperTest(): res = scrapePage(exampleReq0) assert(res=={"error": "scraping.py: url is not recognized as a valid url."}) res1 = scrapePage(exampleReq1) assert("images" not in res1) assert(len(res1['text'])>0) res2 = scrapePage(exampleReq2) assert(len(res2["images"])>1) assert(len(res2['text'])>0) res3 = scrapePage(exampleReq3) print(res3) assert(len(res3["images"])>1) assert(len(res3['text'])>0) res4 = scrapePage(exampleReq4) assert(len(res4["images"])>1) assert("text" not in res4) print("Successfully passed scraping suite") return [res1,res2,res3,res4] #PART 2: Analysis exampleRetPreParse = {'keywords_images': {'0': [{'keyword': 'sign', 'score': 0.6898373278547406}, {'keyword': 'symbol', 'score': 0.4472937186719514}, {'keyword': 'design', 'score': 0.2713939913056557}, {'keyword': 'architecture', 'score': 0.2713049676643986}, {'keyword': 'illustration', 'score': 0.26296517362059374}, {'keyword': 'success', 'score': 0.15815140125121907}, {'keyword': 'night', 'score': 0.1436041312950442}, {'keyword': 'icon', 'score': 0.14356085864611598}, {'keyword': 'finance', 'score': 0.13994914050650978}, {'keyword': 'ideas', 'score': 0.13742065469130105}]}, 'keywords_text': ['web', 'design', 'website', 'cusdom', 'business', 'project', 'experience', 'user', 'college', 'startup']} def analysisTest(scraperData): #Data if use_image and use_text are set to 0 res = analyseSite(scraperData[0],exampleReq5) assert(res=={"error":"analysis.py, problem encountered when analysing site data"}) res2 = analyseSite(scraperData[0],exampleReq1) assert(len(res2["queries"])==3) res3 = analyseSite(scraperData[1],exampleReq2) assert("error" not in res3) res4 = analyseSite(scraperData[2],exampleReq3) assert("error" not in res4) res5 = analyseSite(scraperData[3],exampleReq4) assert("error" not in res5) print("passed analysis testing suite") return[res2,res3,res4,res5] def queryTest(req): res1 = performQueries(req[0]) assert("error" not in res1) res2 = performQueries(req[1]) assert("error" not in res2) res3 = performQueries(req[2]) assert("error" not in res3) res4 = performQueries(req[3]) assert("error" not in res4) print("Passed image search suite") def completeTest(req): scrapedData = scrapePage(req) if("error" in scrapedData): print(scrapedData) return scrapedData queries = analyseSite(scrapedData,req) if("error") in queries: print(queries) return queries res = performQueries(queries) if("error") in res: print(res) return res if __name__ == '__main__': setup() print("starting error free requests") for req in errorFreeReqs: print("requesting", req["url"]) completeTest(req) print("Finished error free requests, if no errors are printed above we are good to go.") #print("completed example test") #scraperData = pageScraperTest() #Analysis #queries = analysisTest(scraperData) #queryTest(queries) #