Spaces:
Sleeping
Sleeping
| #Testing and setup suite for API functionality | |
| from api.scraping import scrapePage | |
| from api.analysis import analyseSite, parseImgLis | |
| from api.searchImages import performQueries | |
| import nltk | |
| def setup(): | |
| nltk.download('punkt') | |
| nltk.download('averaged_perceptron_tagger') | |
| nltk.download('wordnet') | |
| nltk.download('stopwords') | |
| #Test data: | |
| #Scraping | |
| urllist = ["https://stackoverflow.com/questions/1052772/is-there-a-keyboard-shortcut-to-untab-move-a-block-of-code-to-the-left-in-ec","https://www.gocusdom.com/","https://en.wikipedia.org/wiki/Heron","https://brandlume.com/12-proven-ways-to-make-your-website-stand-out/"] | |
| exampleReq1 = {"url": urllist[0],"use_images": False,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} | |
| exampleReq2 = {"url": urllist[1],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} | |
| exampleReq3 = {"url": urllist[2],"use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} | |
| exampleReq4 = {"url": urllist[3],"use_images": True,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} | |
| exampleReq0 = {"url": "invalidurl","use_images": True,"use_text":True,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10,"num_query_keywords":5,"result_images":24} | |
| exampleReq5 = {"url": urllist[0],"use_images": False,"use_text":False,"num_images":1,"page": 0,"num_keywords_text": 10,"num_keywords_images": 10, "num_query_keywords":5,"result_images":24} | |
| errorFreeReqs = [exampleReq1, exampleReq2, exampleReq3, exampleReq4] | |
| def pageScraperTest(): | |
| res = scrapePage(exampleReq0) | |
| assert(res=={"error": "scraping.py: url is not recognized as a valid url."}) | |
| res1 = scrapePage(exampleReq1) | |
| assert("images" not in res1) | |
| assert(len(res1['text'])>0) | |
| res2 = scrapePage(exampleReq2) | |
| assert(len(res2["images"])>1) | |
| assert(len(res2['text'])>0) | |
| res3 = scrapePage(exampleReq3) | |
| print(res3) | |
| assert(len(res3["images"])>1) | |
| assert(len(res3['text'])>0) | |
| res4 = scrapePage(exampleReq4) | |
| assert(len(res4["images"])>1) | |
| assert("text" not in res4) | |
| print("Successfully passed scraping suite") | |
| return [res1,res2,res3,res4] | |
| #PART 2: Analysis | |
| exampleRetPreParse = {'keywords_images': {'0': [{'keyword': 'sign', 'score': 0.6898373278547406}, {'keyword': 'symbol', 'score': 0.4472937186719514}, {'keyword': 'design', 'score': 0.2713939913056557}, {'keyword': 'architecture', 'score': 0.2713049676643986}, {'keyword': 'illustration', 'score': 0.26296517362059374}, {'keyword': 'success', 'score': 0.15815140125121907}, {'keyword': 'night', 'score': 0.1436041312950442}, {'keyword': 'icon', 'score': 0.14356085864611598}, {'keyword': 'finance', 'score': 0.13994914050650978}, {'keyword': 'ideas', 'score': 0.13742065469130105}]}, 'keywords_text': ['web', 'design', 'website', 'cusdom', 'business', | |
| 'project', 'experience', 'user', 'college', 'startup']} | |
| def analysisTest(scraperData): | |
| #Data if use_image and use_text are set to 0 | |
| res = analyseSite(scraperData[0],exampleReq5) | |
| assert(res=={"error":"analysis.py, problem encountered when analysing site data"}) | |
| res2 = analyseSite(scraperData[0],exampleReq1) | |
| assert(len(res2["queries"])==3) | |
| res3 = analyseSite(scraperData[1],exampleReq2) | |
| assert("error" not in res3) | |
| res4 = analyseSite(scraperData[2],exampleReq3) | |
| assert("error" not in res4) | |
| res5 = analyseSite(scraperData[3],exampleReq4) | |
| assert("error" not in res5) | |
| print("passed analysis testing suite") | |
| return[res2,res3,res4,res5] | |
| def queryTest(req): | |
| res1 = performQueries(req[0]) | |
| assert("error" not in res1) | |
| res2 = performQueries(req[1]) | |
| assert("error" not in res2) | |
| res3 = performQueries(req[2]) | |
| assert("error" not in res3) | |
| res4 = performQueries(req[3]) | |
| assert("error" not in res4) | |
| print("Passed image search suite") | |
| def completeTest(req): | |
| scrapedData = scrapePage(req) | |
| if("error" in scrapedData): | |
| print(scrapedData) | |
| return scrapedData | |
| queries = analyseSite(scrapedData,req) | |
| if("error") in queries: | |
| print(queries) | |
| return queries | |
| res = performQueries(queries) | |
| if("error") in res: | |
| print(res) | |
| return res | |
| if __name__ == '__main__': | |
| setup() | |
| print("starting error free requests") | |
| for req in errorFreeReqs: | |
| print("requesting", req["url"]) | |
| completeTest(req) | |
| print("Finished error free requests, if no errors are printed above we are good to go.") | |
| #print("completed example test") | |
| #scraperData = pageScraperTest() | |
| #Analysis | |
| #queries = analysisTest(scraperData) | |
| #queryTest(queries) | |
| # | |