I am trying to scan a list of websites to check wether the following website is developed on wordpress or not. However, I am experiencing an issue in case of site appears to be online but returned a 404 error.
I have to use exit()
to exit the program otherwise i get RemoteConnection
closed issue.
这是扫描网站的代码
import requests
user_agent=None
# user agent so it doesn't show as python and get blocked, set global for request that need to allow for redirects
def get(websiteToScan):
global user_agent
user_agent = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36',
}
return requests.get(websiteToScan, allow_redirects=False, headers=user_agent)
def findWpStatus(websiteToScan):
if websiteToScan.startswith('http://'):
proto = 'http://'
websiteToScan = websiteToScan[7:]
elif websiteToScan.startswith('https://'):
proto = 'https://'
websiteToScan = websiteToScan[8:]
else:
proto = 'http://'
# Check the input for an ending / and remove it if found
if websiteToScan.endswith('/'):
websiteToScan = websiteToScan.strip('/')
# Combine the protocol and site
websiteToScan = proto + websiteToScan
# Check to see if the site is online
print ("Checking to see if the site is online...")
try:
onlineCheck = get(websiteToScan)
except requests.exceptions.ConnectionError as ex:
print (f"{websiteToScan} appears to be offline.")
else:
if onlineCheck.status_code == 200 or onlineCheck.status_code == 301 or onlineCheck.status_code == 302:
print (f"{websiteToScan} appears to be online.")
print ("Beginning scan...")
print ("Checking to see if the site is redirecting...")
redirectCheck = requests.get(websiteToScan, headers=user_agent)
if len(redirectCheck.history) > 0:
if '301' in str(redirectCheck.history[0]) or '302' in str(redirectCheck.history[0]):
print ("[!] The site entered appears to be redirecting, please verify the destination site to ensure accurate results!")
print (f"It appears the site is redirecting to {redirectCheck.url}")
elif 'meta http-equiv="REFRESH"' in redirectCheck.text:
print ("The site entered appears to be redirecting, please verify the destination site to ensure accurate results!")
else:
print ("Site does not appear to be redirecting...")
else:
print (f"{websiteToScan} appears to be online but returned a {str(onlineCheck.status_code)} error.")
exit()
print ("Attempting to get the HTTP headers...")
####################################################
# WordPress Scans
####################################################
# Use requests.get allowing redirects otherwise will always fail
wpLoginCheck=requests.get(websiteToScan + '/wp-login.php', headers=user_agent)
if wpLoginCheck.status_code == 200:
return 'Yes'
return 'No'
websites = ["http://www.autofi.com","https://www.autograph.me", "http://autoidinc.com",
"http://www.automatedinsights.com", "http://automatic.com","https://automationhero.ai", "https://www.automile.com", "https://www.automizy.com", "https://www.automotivemastermind.com",
"https://www.automox.com", "http://www.autonetmobile.com", "http://autonomic.ai",
"http://www.autonomoushealthcare.com", "http://www.automarinesys.com",
"http://www.autopilothq.com", "http://autoref.com", "http://www.autovirt.com",
"https://www.autzu.com", "http://www.ava.me", "http://www.eatwithava.com",
]
for website in websites:
findWpStatus(website)
我遇到了RemoteConnectionClosed问题,因此我不得不执行exit()退出整个程序。
我该如何处理此类问题?我创建了一个repl来观看演示
The issue is with the site http://autoref.com. They appear to be closing the socket without sending any response at all. To get around this add a try except block around your requests.