mirror of
https://github.com/ChronosX88/PyNesca.git
synced 2024-11-22 13:12:18 +00:00
8e1fc5a369
in config.
32 lines
1022 B
Python
32 lines
1022 B
Python
from core.prototypes import AbstractScanner
|
|
from urllib.parse import urlencode
|
|
import requests
|
|
import re
|
|
STATS_SEARCHPATTERN = r'<div id="resultStats>([^>]+)'
|
|
LINK_SEARCHPATTERN = r'<div class="r"><a href="([^"]+)"'
|
|
RESULT_REGEXP = re.compile(LINK_SEARCHPATTERN)
|
|
class GoogleSearcher(AbstractScanner):
|
|
def __init__(self:
|
|
pass
|
|
def scan_address(self, query:'google_search_query')->{"search_result_list"}:
|
|
search_url = "http://google.com/search?%s"
|
|
num_loaded_results = 100
|
|
start = 0
|
|
search_result_list = set()
|
|
while num_loaded_results == 100:
|
|
query_params = {
|
|
"num":100,
|
|
"q":query,
|
|
"start":start,
|
|
"filter":0
|
|
}
|
|
page = requests.get(search_url % urlencode(query_params))
|
|
if page.status_code != 200:
|
|
break
|
|
start += 100
|
|
result_page = set(RESULT_REGEXP.findall(page_text))
|
|
num_loaded_results = len(result_page)
|
|
|
|
|
|
|