medved/lib/plugin/iscan/tasks/http.py

61 lines
2.0 KiB
Python
Raw Permalink Normal View History

from lib.exec import Task
from io import BytesIO
import json
import time
import bs4
import requests
import urllib3
from PIL import Image
from bson.binary import Binary
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy, ProxyType
import zlib
import netaddr
class HTTPFindTask(Task):
def __init__(self, id, root):
super().__init__(id, root)
def _process(self, item):
urllib3.disable_warnings()
response = requests.get(url='http://%s:%s/' % (self._host['ip'], self._host['port']),
timeout=cnf.stalker.HTTP.timeout,
verify=False)
if response.status_code in [400, 401, 403, 500]:
raise self.PipelineError("Bad response")
self._host['data']['response'] = {}
self._host['data']['response']['code'] = response.status_code
self._host['data']['response']['text'] = response.text
self._host['data']['response']['content'] = response.content
self._host['data']['response']['encoding'] = response.encoding
self._host['data']['response']['headers'] = response.headers
encoding = response.encoding if 'charset' in response.headers.get('content-type', '').lower() else None
soup = bs4.BeautifulSoup(response.content, "html.parser", from_encoding=encoding)
if soup.original_encoding != 'utf-8':
meta = soup.select_one('meta[charset], meta[http-equiv="Content-Type"]')
if meta:
if 'charset' in meta.attrs:
meta['charset'] = 'utf-8'
else:
meta['content'] = 'text/html; charset=utf-8'
self._host['data']['response']['text'] = soup.prettify() # encodes to UTF-8 by default
title = soup.select_one('title')
if title:
if title.string:
title = title.string
else:
title = ""
else:
title = ""
self._host['data']['title'] = title