先附上源代码:
import requestsimport reimport timeimport randomsearch=str(input("search syntax:"))page1=int(input("start-page:"))page2=int(input("end-page:"))path=str(input("path:"))try:i=page1for i in range(page1,page2+1):print("page:"+str(i))derpy="derpicdn.net"search_url="https://trixiebooru.org/search?sd=desc&sf=score&page="+str(i)+"&q="+searchheaders={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0","cookie":"filter_id=100073"}results1=requests.get(url=search_url,headers=headers)pattern1=re.compile(r'https://[^\s<>"]+[^\s<>"]')s1=pattern1.findall(results1.text)targets1 = []id = []date_id = []targets2 = []type = []for i in range(0,len(s1)):index1=s1[i].find(derpy)if index1!=-1:#print(s1[i][-9:-4])if s1[i][-2:]!=";}" and s1[i][-2:]!=";]":print(s1[i])targets1.append(s1[i])type.append(s1[i][-3:])for target in targets1:match1=re.search(r"img(.*)/t",target)if match1:original_number=match1.group()num1=original_number.replace("/t","")num2=num1.replace("img","")#print(num2)date_id.append(num2)match2=re.search(r'\/([^\/]+)$',num2)id.append(match2.group(1))#print(len(id))#print(len(date_id))original_url="https://trixiebooru.org/images/"for j in range(0,len(id)):url1=original_url+id[j]key=id[j]+"__"results2=requests.get(url=url1, headers=headers)if type[j]=="gif":pattern2=re.compile(key + r"(.*?)\.gif")s2=pattern2.findall(results2.text)#print("s:"+str(len(s2)))if len(s2)==2:s=s2[0]+"."+type[j]#print(s)target_url="https://derpicdn.net/img/view"+date_id[j]+stargets2.append(target_url)print("正在获取第"+str(j+1)+"个目标...")i+=1if type[j]=="png":pattern3=re.compile(key + r"(.*?)\.png")s3=pattern3.findall(results2.text)#print("s:"+str(len(s3)))if len(s3)==2:s=s3[0]+"."+type[j]#print(s)target_url="https://derpicdn.net/img/view"+date_id[j]+stargets2.append(target_url)print("正在获取第"+str(j+1)+"个目标...")i+=1if type[j]=="jpg":pattern4=re.compile(key + r"(.*?)\.jpg")s4=pattern4.findall(results2.text)#print("s:"+str(len(s4)))if len(s4)==2:s=s4[0]+"."+type[j]#print(s)target_url="https://derpicdn.net/img/view"+date_id[j]+stargets2.append(target_url)print("正在获取第"+str(j+1)+"个目标...")i+=1for target in targets2:print("target:"+target)print("downloading...")n1=random.randint(10000,99999)n2=random.randint(10000,99999)test=requests.get(url=target,headers=headers)pictrue=str(n1)+str(n2)+"."+target[-3:]with open(path+"\"+pictrue,"wb") as t:t.write(test.content)print("Done!")time.sleep(1)except:print("download error!")#default:100073#everything:56027import requests import re import time import random search=str(input("search syntax:")) page1=int(input("start-page:")) page2=int(input("end-page:")) path=str(input("path:")) try: i=page1 for i in range(page1,page2+1): print("page:"+str(i)) derpy="derpicdn.net" search_url="https://trixiebooru.org/search?sd=desc&sf=score&page="+str(i)+"&q="+search headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0", "cookie":"filter_id=100073" } results1=requests.get(url=search_url,headers=headers) pattern1=re.compile(r'https://[^\s<>"]+[^\s<>"]') s1=pattern1.findall(results1.text) targets1 = [] id = [] date_id = [] targets2 = [] type = [] for i in range(0,len(s1)): index1=s1[i].find(derpy) if index1!=-1: #print(s1[i][-9:-4]) if s1[i][-2:]!=";}" and s1[i][-2:]!=";]": print(s1[i]) targets1.append(s1[i]) type.append(s1[i][-3:]) for target in targets1: match1=re.search(r"img(.*)/t",target) if match1: original_number=match1.group() num1=original_number.replace("/t","") num2=num1.replace("img","") #print(num2) date_id.append(num2) match2=re.search(r'\/([^\/]+)$',num2) id.append(match2.group(1)) #print(len(id)) #print(len(date_id)) original_url="https://trixiebooru.org/images/" for j in range(0,len(id)): url1=original_url+id[j] key=id[j]+"__" results2=requests.get(url=url1, headers=headers) if type[j]=="gif": pattern2=re.compile(key + r"(.*?)\.gif") s2=pattern2.findall(results2.text) #print("s:"+str(len(s2))) if len(s2)==2: s=s2[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 if type[j]=="png": pattern3=re.compile(key + r"(.*?)\.png") s3=pattern3.findall(results2.text) #print("s:"+str(len(s3))) if len(s3)==2: s=s3[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 if type[j]=="jpg": pattern4=re.compile(key + r"(.*?)\.jpg") s4=pattern4.findall(results2.text) #print("s:"+str(len(s4))) if len(s4)==2: s=s4[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 for target in targets2: print("target:"+target) print("downloading...") n1=random.randint(10000,99999) n2=random.randint(10000,99999) test=requests.get(url=target,headers=headers) pictrue=str(n1)+str(n2)+"."+target[-3:] with open(path+"\"+pictrue,"wb") as t: t.write(test.content) print("Done!") time.sleep(1) except: print("download error!") #default:100073 #everything:56027import requests import re import time import random search=str(input("search syntax:")) page1=int(input("start-page:")) page2=int(input("end-page:")) path=str(input("path:")) try: i=page1 for i in range(page1,page2+1): print("page:"+str(i)) derpy="derpicdn.net" search_url="https://trixiebooru.org/search?sd=desc&sf=score&page="+str(i)+"&q="+search headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0", "cookie":"filter_id=100073" } results1=requests.get(url=search_url,headers=headers) pattern1=re.compile(r'https://[^\s<>"]+[^\s<>"]') s1=pattern1.findall(results1.text) targets1 = [] id = [] date_id = [] targets2 = [] type = [] for i in range(0,len(s1)): index1=s1[i].find(derpy) if index1!=-1: #print(s1[i][-9:-4]) if s1[i][-2:]!=";}" and s1[i][-2:]!=";]": print(s1[i]) targets1.append(s1[i]) type.append(s1[i][-3:]) for target in targets1: match1=re.search(r"img(.*)/t",target) if match1: original_number=match1.group() num1=original_number.replace("/t","") num2=num1.replace("img","") #print(num2) date_id.append(num2) match2=re.search(r'\/([^\/]+)$',num2) id.append(match2.group(1)) #print(len(id)) #print(len(date_id)) original_url="https://trixiebooru.org/images/" for j in range(0,len(id)): url1=original_url+id[j] key=id[j]+"__" results2=requests.get(url=url1, headers=headers) if type[j]=="gif": pattern2=re.compile(key + r"(.*?)\.gif") s2=pattern2.findall(results2.text) #print("s:"+str(len(s2))) if len(s2)==2: s=s2[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 if type[j]=="png": pattern3=re.compile(key + r"(.*?)\.png") s3=pattern3.findall(results2.text) #print("s:"+str(len(s3))) if len(s3)==2: s=s3[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 if type[j]=="jpg": pattern4=re.compile(key + r"(.*?)\.jpg") s4=pattern4.findall(results2.text) #print("s:"+str(len(s4))) if len(s4)==2: s=s4[0]+"."+type[j] #print(s) target_url="https://derpicdn.net/img/view"+date_id[j]+s targets2.append(target_url) print("正在获取第"+str(j+1)+"个目标...") i+=1 for target in targets2: print("target:"+target) print("downloading...") n1=random.randint(10000,99999) n2=random.randint(10000,99999) test=requests.get(url=target,headers=headers) pictrue=str(n1)+str(n2)+"."+target[-3:] with open(path+"\"+pictrue,"wb") as t: t.write(test.content) print("Done!") time.sleep(1) except: print("download error!") #default:100073 #everything:56027
此脚本需要本地搭建python环境,复制源代码到文件,改后缀为py。
脚本会要求输入呆站搜索语句、抓取图片的起始页和结束页以及图片保存的路径。以artist:magnaluna为例
运行截图:
稍等片刻就可以在对应文件夹下看到小马啦!
#脚本使用的过滤器默认为default,如果需要更换的话可以自行更改源码headers里面的filter_id
100027:Default
56027:Everything
37431:Legcy Default
37429:18+ Dark
37432::18+ R34
37430:Maximum Spoilers
在下学艺不精,还请大佬们多多包含!