# -*- coding: utf-8 -*- # MarkUp Language import zipOrZip as oz import mw; from mw import * import re,os import urllib.request from urllib.parse import urlparse,quote,quote_plus,unquote,unquote_plus,urlencode orror=mw.orro3("roml.txt"); urltmpdir="r:\\TEMP\\" hp="https://" class webrap: user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36', ## 'Mozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.129 Mobile Safari/537.36' 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'Opera/9.25 (Windows NT 5.1; U; en)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12' 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9' ] headers=[] headers.append( ('User-agent', user_agents[0])) headers.append((b'\x41\x63\x63\x65\x70\x74-\x65\x6E\x63\x6F\x64\x69\x6E\x67'.decode(), 'gzip'))# encoding def __init__(zl): zl.user_agent=webrap.user_agents[0]; zl.code="utf-8"; def gZip(zl): byte = zl.httpres.read() hs={f.lower():v for f,v in zl.httpres.getheaders()} if('content-encoding' in hs and hs['content-encoding'] == 'gzip'): byte = oz.extgz(byte); return byte def g0(zl): opener = urllib.request.build_opener() # opener.addheaders = zl.headers #print(zl.url) zl.httpres = opener.open(zl.url)#,timeout=2) return zl.gZip(); def getCache(zl):#offline cache esurl=zl.url esurl=mw.esfn(esurl.replace(hp,"")) # esurl=unquote_plus(esurl,zl.code) zl.offfile=urltmpdir+esurl ## zl.offfile=zl.offfile[:255] if(zl.url.find("://")==-1): zl.url=hp+zl.url if(mw.ifexist(zl.offfile)): byte=mw.rfdb(zl.offfile) zl.byte=byte; return zl.byte return None; def orwgetof0(zl): if(not zl.getCache()): ## zl.url=quote_plus(zl.url,encoding=code,safe=":/") zl.byte = zl.g0(); wfdb(zl.offfile,zl.byte); orRemove(zl.offfile, rembat="wget cache remove.bat") return zl.byte; def orwgetoff(url,post=0): zl=webrap() zl.url=url; return zl.orwgetof0() def utf(zl,url): zl.url=url; t1=zl.orwgetof0(); t2=t1.decode(zl.code); return t2; def gton(zl,*aw): return zl.gtoff(*aw,on=True) def gtoff(zl,url,code="",on=False): if "#" in url: url=url.split("#")[0] zl.url=quote(unquote_plus(url),safe=":/?=") ## print(zl.url) zl.code=code; t1=zl.g0() if on else zl.orwgetof0(); if code==bytes: return t1 t2=t1.decode(code) if code else vcs(t1); return t2; def wget(zl,*aw):return zl.gtoff(*aw,on=False) # def orwget(zl,url,post=0,off=1):#legacy esurl=url esurl=esfn(esurl.replace(hp,"")) esurl=unquote_plus(esurl,encoding=zl.code) zl.offfile=urltmpdir+esurl if(url.find(hp)==-1): url=hp+url if(ifexist(zl.offfile) and off): byte=rfdb(zl.offfile) else: ## url=quote_plus(url,encoding=code,safe=":/") user_agent=webrap.user_agents[0]; opener = urllib.request.build_opener() opener.addheaders = [ ('User-agent', user_agent), ('Accept-encoding', 'gzip')] qpost=zl.mkpost(post) if post else None# zl.res = opener.open(url,qpost.encode("utf8"))#zl.res byte = zl.res.read() hs={k:v for k,v in zl.res.getheaders()} if('Content-Encoding' in hs and hs['Content-Encoding'] == 'gzip'): byte = oz.extgz(byte); if off: wfdb(zl.offfile,byte) return byte.decode(zl.code); def mkpost(zl,q): return jw("&",[jw("=",[quote_plus(str(w),encoding=zl.code) for w in f]) for f in q]) ## return tea ## l = [] if 1: tea="" for k, v in q: k = quote_plus(str(k),encoding=zl.code) v = quote_plus(str(v),encoding=zl.code) #l.append(k + '=' + v) tea+=k+"="+v+"&" return tea[:-1] irle=re.compile(r'(?P.*?)'); def le(te): "link extracter" zl.links=webrap.irle.findall(te); return zl.links; def nx(zl,reg,te,url,page=1): "pager" #par=urlparse(zl.url)#netloc path query irnx=re.compile(reg) def fpnxurl(tefp): if not(mo:=irnx.search(tefp)):print("not match regnx"); return else: if type(page)!=int: nxquery=mo.group(1) nxquery=nxquery.replace("&","&") nxurl=jw("------",url, nxquery) print(nxquery) ## print((zl.url, nxquery), nxurl) return nxurl return True nxurl=url if te: yield te if type(page)==int: page+=1 nxurl=fpnxurl(te) for n in range(page or 0,987987987): if page: nxurl=url.format(n) ## print(nxurl) te=zl.gtoff(nxurl,zl.code, on=1) yield te if not(nxurl:=fpnxurl(te)): break def firefox(zl,url): import webbrowser webbrowser.open(url) def ggle(zl,q): zl.code="utf-8" zl.code="shift-jis" post=(("q",q),("hl","ja"))#japan qp=zl.mkpost(post) q2="http://www.google.co.jp/search?"+qp te=zl.orwget(q2) return te def chr(sl,te):lwt ## 文字参照や実体参照を通常の文字に戻す [Python, Tips] - Programming Magic5 # 実体参照 & 文字参照を通常の文字に戻す @staticmethod def htmlentity2unicode(text): import html.entities as htmlentitydefs # 正規表現のコンパイル reference_regex = re.compile('&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE) num16_regex = re.compile('#x\d+', re.IGNORECASE) num10_regex = re.compile('#\d+', re.IGNORECASE) result = "" i = 0 while True: # 実体参照 or 文字参照を見つける match = reference_regex.search(text, i) if match is None: result += text[i:] break result += text[i:match.start()] i = match.end() name = match.group(1) # 実体参照 if name in htmlentitydefs.name2codepoint.keys(): result += chr(htmlentitydefs.name2codepoint[name]) # 文字参照 elif num16_regex.match(name): # 16進数 result += chr(int('0'+name[1:], 16)) elif num10_regex.match(name): # 10進数 result += chr(int(name[1:])) return result if 0: webrap().ggle("かんこれ"); def yaho(zl,q): zl.code="utf-8" post=(("ei","UTF-8"),("p",q),("n","40"))#nPageNum10|15|20|30|40|100 qp=zl.mkpost(post) q2="http://search.yahoo.co.jp/search?"+qp te=zl.orwget(q2) return te def bing(zl,url): esurl=url esurl=esfn(esurl.replace(hp,"")) zl.code=code="utf-8" esurl=unquote_plus(esurl,encoding=code) zl.offfile=urltmpdir+esurl if(url.find(hp)==-1): url=hp+url if(ifexist(zl.offfile)): byte=rfdb(zl.offfile) else: #url=quote_plus(url,encoding=code,safe=":/") user_agent=webrap.user_agents[0]; opener = urllib.request.build_opener() ## opener.addheaders = [ ('','')] opener.addheaders= [('Host',' www.bing.com')] opener.addheaders= [('User-Agent',' Mozilla/5.0 (Windows NT 5.1; rv:26.0) Gecko/20100101 Firefox/26.0')] opener.addheaders= [('Accept',' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')] opener.addheaders= [('Accept-Language',' ja,en-us;q=0.7,en;q=0.3')] opener.addheaders= [('Accept-Encoding',' gzip, deflate')] qpost=zl.mkpost(post) if post else None# zl.res = opener.open(url)#zl.res byte = zl.res.read() hs={k:v for k,v in zl.res.getheaders()}#header had raised TypeError if('Content-Encoding' in hs and hs['Content-Encoding'] == 'gzip'): byte = oz.extgz(byte); wfdb(zl.offfile,byte) return byte.decode(code); def quotesafe(zl,url): qurl=quote_plus(url,encoding=code,safe=":/&?") write8(r"r:\url1",url) write8(r"r:\url2",qurl) if 0:#getoff im=webrap() im.url=r"http://blog.livedoor.jp/yuriss/" te=im.orwgetoff(im.url) wfdb(r"r:\get.txt",te); #ftp from ftplib import FTP class owf: def login(sl,url,aka,pwd): sl.imf=FTP(url) sl.imf.login(aka,pwd) def list(sl): return sl.imf.retrlines("LIST") def cbO(sl,blockData): sl.res+=blockData#packet def o(sl,fn): sl.res=b"" ri=sl.imf.retrbinary('RETR '+fn,sl.cbO) return sl.res def uf(sl,fn): with open(fn, "rb")as fo: #_ftp.cwd("/target_dir/") return sl.imf.storbinary("STOR "+os.path.split(fn)[1], fo) def u(sl,fn,fo):#file object return sl.imf.storbinary("STOR "+fn, fo) def dele(sl): sl.imf.quit() #------------ ##from __future__ import print_function from threading import Thread,Lock,Event,Condition import socket from contextlib import closing import select defaultPort=4000 class orListen(Thread): #python listen def __init__(sl,cb=None,fil=(),ercb=(),**kw): super().__init__(**kw) sl.owari=False if cb: sl._cb=cb sl.fil=fil sl.ercb=ercb or (sl._ercb, []) # sl.port = defaultPort def i4l(sl,port,cb=None,fil=(),ercb=(),**kw): super().__init__(**kw) sl.owari=False assert sl._cb, "reimmpriment cb" sl.fil=fil sl.ercb=(sl._ercb, []) # sl.host = '127.0.0.1' sl.port = port return sl def main(sl):#, port = defaultPort): backlog = 10 bufsize = 4096 server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sl.server_sock=server_sock Set = set([server_sock]) try: server_sock.bind((sl.host, sl.port)) server_sock.listen(backlog) while True: rready, wready, xready = select.select(Set, [], [],5.0) ## print("ready") if sl.owari: print("owari select") break for sock in rready: try: if sock is server_sock: conn, address = server_sock.accept() Set.add(conn)#peer else: te = sock.recv(bufsize) if len(te) == 0:pass elif te==b"owari": sock.send(b"len-0;") Set.remove(sock) sl.owari=1 shuttown(sock) else: sl.sock=sock sl._cb(te, sl.fil) except Exception as e: except2(edit=1) Set.remove(sock) shuttown(sock) #sl.ercb[0](e,sl.ercb[1]) except Exception as e: print(e) except2(edit=1) #sl.ercb[0](e,sl.ercb[1]) finally: for sock in Set: shuttown(sock) return def run(sl):#start sl.main() ## except ConnectionResetError as er: def chkbind(sl): server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) Set = set([server_sock]) try: server_sock.bind((sl.host, sl.port)) ## server_sock.listen(backlog) except Exception as e: e2=traceback.format_exc() rn=0 else: rn=1 finally: for sock in Set: shuttown(sock) return rn def Release(sl):#finalist if sl.chkbind(): return send1("127.0.0.1", port=sl.port, te="owari")#4 unblock select sl.owari=True class _orListen(orListen): def _cb(sl,te,fil): te=te.decode() print(te) if(te=="x"): sl.Release() #sl.sock.send(te.encode()) sl.cb2(te,fil) ## _cb=cb def cb2(sl,te,fil): pn=sl.sock.getpeername() ## print(te) so=sl.sock if 2:#local lcl=dict(locals()) lcl.update({"r":"r:"}) try: exec(te,globals(),lcl) except Exception as e: print("exec:", traceback.format_exc()) for i in dict(locals()): lcl.pop(i,55) #sl.sock.lcl=lcl # sl.sock.send(lcl["r"].encode()) def _ercb(sl,erte,fil): print("---",erte) def chkbind(port): return _orListen().i4l(port).chkbind() def send1(host, port, te): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) with closing(sock): sock.connect((host, port)) sock.send(te.encode("utf8")) def mainClient(fil=(0,),sp=(),iLi=(),host = '127.0.0.1',port = defaultPort,bufsize = 4096,timeout=2.0): let=[] once,=fil sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) with closing(sock): sock.connect((host, port)) if iLi: for te in iLi: try: sock.send(te.encode('utf-8')) sock.settimeout(timeout) #let.append(sock.recv(bufsize)) if 1: for x1 in range(99): te=sock.recv(bufsize) if(te):let.append(te) #print(let) except Exception as e: e2=traceback.format_exc() print(e2) return let if once: for n in range(once): try: sock.send(str(n).encode('utf-8')) print(" send;",sock.recv(bufsize)) except Exception as e: e2=traceback.format_exc() print(e2) while not once: line = sys.stdin.readline().rstrip() if len(line) == 0: print("send x to quit.") break sock.send(line.encode('utf-8')) print(" client:",sock.recv(bufsize)) return send=mainClient if __name__ == '__main__': from subprocess import * import time import sys import traceback hh,fl=hhfl([]) hh=hhww() ## hh.getaddrinfo=1 # if hh.getaddrinfo: print(socket.getaddrinfo("localhost",4000)) print(socket.getaddrinfo("localhost",8080)) # ## post=[3:4,5:6] post=[(1,1),(2,2),(3,3),("q","検索")] q=urlencode(post) print(q) im=webrap() im.code="cp932" ## print(im.mkpost(post)) if 0:#? rn=webrap().yaho("そのだうみ") ## print(rn) rn=webrap.orwgetoff("http://search.yahoo.co.jp/search?") rn=webrap().bing("http://www.bing.com/search?q=かんこれ&qs=n&form=QBLH&filt=all&pq=かんこれ&sc=8-4&sp=-1&sk=")#404repair by delete quote process rn=webrap.orwgetoff("http://livedoor-search.naver.jp/search?c=ld_blog_sb&o_st=livedoor&sm=ldb_top&q=かよちん&search_btn=検索") webrap().bing(r"http://livedoor-search.naver.jp/search?c=ld_blog_sb&o_st=livedoor&sm=ldb_top&q=かよちん&search_btn=検索"); if {"ftp"}.issubset(hh): imf=owf() # imf.login("ftp:shinuno.comxa.com","a6091821","heikou2zero")#exc imf.login("red.ribbon.to","shinuno","forget")#exc if {"ftp","ftp1"}.issubset(hh): #all stdout li=imf.list() print("----",li) li=imf.imf.retrlines("NLST") print("----",li) li=imf.imf.retrlines("MLSD") print("----",li) def mkBody(te): t2=""" Index of /video %s """%(te) return t2 if {"ftp","ftp2"}.issubset(hh): imf.imf.cwd("/public_html/video") print(imf.imf.pwd()) print(imf.imf.nlst()) #sendcmd("LIST") # from io import BytesIO import time t=time.localtime(); t2=("d_dd_d"%(t.tm_mday,t.tm_hour,t.tm_min,t.tm_sec)) fn="|"+t2+".txt" fnh="|"+t2+".htm" #no fn932=fn.encode("cp932") ri=imf.u(fn,BytesIO(b"fwfwf")) ri ri=imf.u(fnh,BytesIO(mkBody(time.strftime("%Y_%m/%d %H%M %A %B %c")).encode("utf8"))) if {"ftp","ftp3"}.issubset(hh): imf.imf.cwd("/public_html/cgi") # fn=r"N:\afilen\program\html\award\cgi\Dirr1.php" fn=r"N:\afilen\program\html\award\cgi\wget.php" ri=imf.uf(fn) ## dn=r"N:\afilen\program\html\award\cgi" ## for fn in os.listdir(dn):imf.uf(jofn(dn,fn)) ## tes2() ## tes1() imf.dele() import jk987 # iml=_orListen() if "client"==arps.f:#? try: ## q8erw mainClient([2]) except ConnectionRefusedError as er: print(" connectEr: ", str(er)) except Exception as e: e2=traceback.format_exc() print(e2) input("owari\n") elif "listen" in hh: iml.start() ## Popen(["E:\Python34\python.exe",sys.argv[0]]+["-f","client"],shell=True) ## Popen(["start","E:\Python34\python.exe "+jw(" ",[sys.argv[0],"-f","client"])],shell=True) ## Popen(["start",r"E:\Python34\python.exe",r"N:\afilen\program\python\qt\ClipProcess2.pyw"],shell=True)#ok ## Popen(["start",r"N:\afilen\program\python\qt\ClipProcess2.pyw"],shell=True)#ok for _ in range(4): Popen(["start",sys.argv[0], "-f","client"],shell=True) for f in range(2): print("listen:",f) time.sleep(2.0) iml.owari=True if "clien2"==arps.f:# try: mainClient([None]) except Exception as e: e2=traceback.format_exc() print(e2) input("owari\n") elif "ser2" in hh: for i in range(2): Popen(["start",sys.argv[0], "-f","clien2"],shell=True) iml.main() for i in range(2): print("listen:",i) time.sleep(2.0) iml.owari=True #sys.exit() #