资产搜集思考
在过往的打点经历中,发现有好多资产并不是备案、域名、C段等常规手段获取,好多都是主页或其他页面存在跳转。 因此花费几个小时时间把这个攻击面插件写出来。
插件简单设计
本次设计可能很简单,毕竟是个快速出炉的脚本
爬取指定url链接
过滤link
设置了4个子集,url_subdomain、subdomain、url_ip、ip
如果是ip
直接将link填入url_ip以及ip中
如果是domain
判断link是否包含login、admin、sign、auth关键字
判断是否与输入 URL 具有相同主域名
判断是否同备案(需要根据实际情况实现)
通过任一均填入url_subdomain以及subdomain中
插件效果
插件源码
应该是有bug的,反正自用,之后再改。
websiteinfo.py
import requests
from bs4 import BeautifulSoup
import ipaddress
from urllib.parse import urlparse
import beian
def get_external_links(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',
'Referer': 'https://www.baidu.com/'
}
# 发送请求并获取网页内容
try:
response = requests.get(url, headers=headers, verify=False, timeout=3)
if response.status_code != 200:
return []
except:
return []
# 使用 BeautifulSoup 解析网页内容
soup = BeautifulSoup(response.content, 'html.parser')
# 提取所有链接
links = soup.find_all('a', href=True)
# 过滤外链
external_links = []
for link in links:
href = link['href']
# 判断是否是外链
if is_external_link(href):
external_links.append(href)
return external_links
def is_external_link(href):
# 忽略锚链接
if href.startswith('#'):
return False
# 忽略javascript链接
if href.startswith('javascript:'):
return False
# 判断是否以http或https开头
if not href.startswith('http://') and not href.startswith('https://'):
return False
return True
def filter_external_links(extern_links, url):
result = {
'url_subdomain': set(),
'subdomain': set(),
'url_ip': set(),
'ip': set(),
}
for link in extern_links:
# 判断链接类型
if is_ip_address(link):
# IP 地址
result['url_ip'].add(link)
result['ip'].add(urlparse(link).hostname)
else:
# 域名
# 判断link是否包含login或admin关键字
if 'login' in link.lower() or 'admin' in link.lower() or 'sign' in link.lower() or 'auth' in link.lower():
result['url_subdomain'].add(link)
# 判断是否与输入 URL 具有相同主域名
domain = urlparse(link).hostname
if is_same_subdomain(domain, urlparse(url).hostname):
result['url_subdomain'].add(link)
result['subdomain'].add(domain)
else:
# 判断是否同备案(需要根据实际情况实现)
if is_recorded(domain, urlparse(url).hostname):
result['url_subdomain'].add(link)
result['subdomain'].add(domain)
return {key: list(value) for key, value in result.items()}
def is_ip_address(link):
try:
parse_link = urlparse(link)
ipaddress.ip_address(parse_link.hostname)
return True
except ValueError:
return False
def is_same_subdomain(domain1, domain2):
parts1 = domain1.split('.')
parts2 = domain2.split('.')
# 至少需要两个部分
if len(parts1) < 2 or len(parts2) < 2:
return False
# 比较主域名
return parts1[-2] == parts2[-2]
def is_recorded(domain1, domain2):
# domain type: test.com
domain_part1 = domain1.split('.')
domain_part2 = domain2.split('.')
# 至少需要两个部分
if len(domain_part1) < 2 or len(domain_part2) < 2:
return False
try:
parts1 = beian.icp_search(domain_part1[-2])['Company_Name']
parts2 = beian.icp_search(domain_part2[-2])['Company_Name']
print(domain_part1[-2], domain_part2[-2])
if parts1 == parts2:
return True
else:
return False
except:
return False
def get_site_info_result(url):
return filter_external_links(get_external_links(url), url)
if __name__ == '__main__':
url = 'https://www.hnyjj.org.cn'
external_links = get_external_links(url)
result = filter_external_links(external_links, url)
print(result)
beian.py
import time
import requests
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings("ignore")
def extract_data(html):
result = {}
soup = BeautifulSoup(html, 'html.parser')
type = soup.select_one('td.th:contains(备案类型) + td span').text
company = soup.select_one('td.th:contains(备案主体) + td a').text
number = soup.select_one('td.th:contains(备案号) + td a').text
start = soup.select_one('td.th:contains(备案时间) + td span').text
end = soup.select_one('td.th:contains(备案时间) + td span').next_sibling.text
verify_time = start + "-" + end
result["typ"] = type
result["comName"] = company
result["license"] = number
result["verifyTime"] = verify_time
return result
def icp_search0(domain):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63",
"Content-Type": "application/json"
}
res = requests.get("https://icplishi.com/" + domain + "/", headers=headers, verify=False).text
if "备案主体" not in res:
return None
info = extract_data(res)
formatted_data = {"ICP_Type": info["typ"].strip(), "Company_Name": info["comName"].strip(),
"ICP_Number": info["license"].strip(), "Verify_Time": info["verifyTime"].strip()}
return formatted_data
except Exception as e:
return None
def icp_search1(domain):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63",
"Content-Type": "application/json"
}
res = requests.get("https://www.aizhan.com/cha/" + domain + "/", headers=headers, verify=False).text
if "备案信息" not in res:
return None
soup = BeautifulSoup(res, 'html.parser')
icp_number = soup.find("a", id="icp_icp").text
icp_type = soup.find("span", id="icp_type").text
icp_company = soup.find("span", id="icp_company").text
icp_passtime = soup.find("span", id="icp_passtime").text
formatted_data = {"ICP_Type": icp_type.strip(), "Company_Name": icp_company.strip(),
"ICP_Number": icp_number.strip(), "Verify_Time": icp_passtime.strip()}
# 防止请求过快被禁
time.sleep(2)
return formatted_data
except Exception as e:
return None
def icp_search(domain):
if icp_search0(domain):
return icp_search0(domain)
else:
return icp_search1(domain)