#!/bin/bash
#
# bing-ip2hosts - Enumerate hostnames from Bing.com for an IP address.
# Bing.com is Microsoft's search engine which has an IP: search parameter.
#
# Copyright (C) 2009 - 2020 Andrew Horton aka urbanadventurer
# Homepage: http://www.morningstarsecurity.com/research/bing-ip2hosts
#
# License: GPLv3
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

VERSION=1.0.5
TMP_DIR="${TMPDIR:-/tmp}"
QUIET=0
OUTPUT_IP=0
DISPLAY_URL_PREFIX=1
IP=
PREFIX=
DEBUG=1
STOP_AFTER_PAGES=5
BING_SETLANG=en-us
BING_SETMKT=
UNAME=$(uname)
USER_AGENT="Wget/1.20 (linux-gnu)" # Bing responds differently to wget version 1.20+

N="\033[0;0m"  #NORMAL
W="\033[0;97m" #WHITE
R="\033[0;31m" #RED
DR="\033[1;31m" #DARKRED
G="\033[0;32m" #GREEN
Y="\033[1;33m" #YELLOW
B="\033[0;34m" #BLUE
DG="\033[1;30m" #DARKGRAY
REVERSED="\033[1;7m" #REVERSED

trap ctrl_c INT

function ctrl_c() {
  echo -e "\n* Stopping after user pressed CTRL-C."

  # clean up tmp files
  if [[ $DEBUG == 0 ]]; then
    rm -f "$f_scraped_html"
  fi

  show_hosts
  exit 1
}

function dependency_check() {

  if [[ "$(which wget)" == "" ]]; then
    echo "This script requires wget, please resolve and try again."
     if [[ $UNAME == "Darwin" ]]; then
      echo "Try: brew install wget"
    fi
    exit 1
  fi

}

function banner() {
BANNER="
$G  m,                $N   .,recon:,        ,,
$G  #####             $N  ]##\"\"^^\"%##m    %##b
$G  ####b             $N  ]##      \`##b
$G  ####b             $N  ]##       ##    i##    @#b,######m       ,######m ##b
$G  ####b 1mw,        $N  ]##MMM####      i##    ]###\`    %##     ###\`    \`@##
$G  ####b  1#####Nw,  $N  ]##\`\`    @#b    i##    ]##       ###   ###       j##
$G  ####i   %########[$N  ]##       @##   i##    ]##       ###   ##g       j##
$G  ####n      2#####[$N  ]##      @##    i##    ]##       ###   @##       {##
$G  ####g  ,#########b$N  ]##   ,,e###    j##    ]##       ###    7##m,,,s#M##
$G  #############M^   $N  'WWWWWW%b^       ii    'nn       nn*      \`1337\` g##
$G  ##########\"      $N                                                    G##
$G    \"%##\"         $N                                            @#Gmmem###G
  ,i             $G   ,s2e,    $N ##                                  \`\`\`\`
   \`            $G   \"\`   %#$N    ##                             T#
  ]#   ]#,#M5@#p $G        #b  $N #H#H%@#     s#M5O#o   ,#MSSM  W@##W=  s#SSW
  ]#   j#p    ^#p$G      ,#M   $N ##    @#   ##'   'O#  S#,      ]#     #b
  ]#   j#      #M$G    ,#M     $N ##    @#   #o     O#    \"SXm   ]#      ^\"@#
  ]#   j##,  ,## $G  ,#2       $N ##    @#   7#.   .#O  ,   ]#   ]#Q       ,#s
  ]#   j######'  $G  #######x  $N ##    @#    s#####o    ####^    #Tt    ####^
       j#
$N       j#         $W bing-ip2hosts ($VERSION) by Andrew Horton @urbanadventurer
$N       j#         $W https://morningstarsecurity.com/research/bing-ip2hosts
$N                  $W https://github.com/urbanadventurer/bing-ip2hosts$N"

  echo -e "$BANNER"
}


function display_progress() {

tput_cols=$(tput cols)
maxwidth_url=$((tput_cols - 6))

unset urlpath_truncated
if [[ ${#urlpath} -gt $maxwidth_url ]]; then
  urlpath_truncated="${urlpath:0:$maxwidth_url}"
fi

tput_lines=$(tput lines)
max_vhosts=$(( tput_lines - 15))
vhosts_truncated=$( echo "$vhosts" | head -$max_vhosts )

highlight_paginated_links="${paginated_links/$paginated_current_link/${G}${REVERSED}$paginated_current_link${N}}"

PROGRESS="$G  m,           $DG -----.--[$N bing-ip2hosts v$VERSION $DG]-------------------------
$G  #####             $DG | $N Searching    : $IP
$G  ####b             $DG | $N$Y Found        : $uniq_hosts 
$G  ####b             $DG | $N Scraped pages: $page  
$G  ####b 1mw,        $DG | $N 
$G  ####b  1#####Nw,  $DG | $N Page Title   : $page_title
$G  ####i   %########[$DG | $N Results      : $results_count
$G  ####n      2#####[$DG | $N Pagination   : $highlight_paginated_links
$G  ####g  ,#########b$DG | $N New          : $new_results new
$G  #############M^   $DG | $N   
$G  ##########\"       $DG | $N $extra_status_update
$G    \"%##\"           $DG | $N CTRL-C to stop

${N}[ ] /${urlpath_truncated:-$urlpath}"

  #progress_animation="⣾⣽⣻⢿⡿⣟⣯⣷"
  progress_animation="▏▎▍▌▋▊▉█▊▋▌▍▎"

  clear
  echo -e "$PROGRESS"
  echo -e "${B}$vhosts_truncated${N}"

  x=$((page % 13))
  tput cup 13 1
  echo -e "$DR${progress_animation:$x:1}"
  tput cup 24 0
}

function show_hosts() {
  local suffix=""

  # sort by hostname. ignore URL prefix
  if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
    uniq_hosts=$(sort -u -t '/' -k 3 < "$f_results")
  else
    uniq_hosts=$(sort -u < "$f_results")
  fi

  count_uniq_hosts=$(sort < "$f_results" | uniq | wc -l | tr -d ' ')

  if [[ $DEBUG == 0 ]]; then
    rm -f "$f_results"
  fi

  if [[ $OUTPUT_IP == 1 ]]; then
    PREFIX="$IP,"
  fi

  if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
    suffix="/"
  fi

  echo -en "$N"

  for h in $uniq_hosts
  do
    echo "$PREFIX$h$suffix"
  done

  if [[ -n "$OUTPUT_FILE" ]]; then
    for h in $uniq_hosts
    do
      echo "$PREFIX$h$suffix" >> "$OUTPUT_FILE"
    done
  fi

  if [[ $QUIET == 0 ]]; then
    echo -e "\n${G}✓${N} Found ${Y}$count_uniq_hosts ${N}results after scraping ${Y}$page ${N}pages."
    echo 
  fi
}

function validate_target(){
  # if the parameter looks like an IP go ahead, otherwise resolve it
  if [[ "$1" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
    ret="$1"
  else
    ret=$(nslookup "$1" | grep -E "^Address: \w+\.\w+\.\w+\.\w+$" | tail -1 | awk '{ print $2 }')
    if [ "$ret" == "" ]; then
      echo "Cannot resolve $1 to an IP address." >&2
    fi
  fi
}

function scrape_ip(){
  target="$1"
  page=1
  last_page_check=
  results_count=
  uniq_hosts=0
  single_page=
  results_removed=
  extra_status_update=
  declare -a scrape_results

  f_results=$(mktemp -p "$TMP_DIR" -t bing-ip2hosts.tmp.XXXXXX 2>/dev/null || mktemp "$TMP_DIR"/bing-ip2hosts.tmp.XXXXXX)

  validate_target "$target"
  IP="$ret"

  if [[ -z "$IP" ]]; then
    return 1
  fi

  while true; do
    url="https://www.bing.com/search?q=ip%3A$IP+.&qs=n&first=$((page-1))0&FORM=PERE&setlang=$BING_SETLANG&setmkt=$BING_SETMKT"
    urlpath=$(echo "$url" | cut -d '/' -f 4-)

    if [[ $page == 1 ]] && [[ $QUIET == 0 ]]; then
      display_progress
    fi

    f_scraped_html=$(mktemp -p "$TMP_DIR" -t bing-ip2hosts.tmp.XXXXXX 2>/dev/null || mktemp "$TMP_DIR"/bing-ip2hosts.tmp.XXXXXX)
    if ! wget --quiet --no-check-certificate --user-agent "$USER_AGENT" --output-document "$f_scraped_html" "$url"; then
      echo -e "\n${R}wget failed to scrape $url" >&2
      exit 1
    fi

    last_page_check=$(grep -E -o '<span class="sb_count">[0-9]+-([0-9]+) of (\1) results' "$f_scraped_html")
    results_count=$(grep -E -o '<span class="sb_count">[^<]+' "$f_scraped_html" |cut -d '>' -f 2|cut -d ' ' -f 1-3)
    single_page=$(grep -E -o '<span class="sb_count">[0-9] results' "$f_scraped_html")
    results_removed=$(grep -E -o '>Some results have been removed<' "$f_scraped_html")
    page_title=$(grep -E -o '<title>([^<]+)' "$f_scraped_html" | cut -d '>' -f 2)
    paginated_current_link=$(grep -E -o "class=\"sb_pagS[^>]+sb_bp\">([0-9]+)" "$f_scraped_html" | cut -d '>' -f 2)
    paginated_links=$(grep -E -o "<h4 class=\"b_hide\">Pagination</h4>.*?/nav" "$f_scraped_html" | grep -E -o ">([0-9\.]+)</a" | grep -E -o "[0-9\.]+" | tr '\n' ' ')

    results=$(grep -E -o "<h2><a href=\"[^\"]+" "$f_scraped_html" | cut -d '"' -f 2)

    if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
      vhosts=$(echo "$results" | cut -d '/' -f 1-3)
    else
      vhosts=$(echo "$results" | cut -d '/' -f 3)
    fi

    if [[ $UNAME == "Darwin" ]]; then
      vhosts=$(echo "$vhosts" | tr '[:upper:]' '[:lower:]' ) # macOS has an old version of Bash
    else
      vhosts="${vhosts,,}" # convert to lowercase in pure Bash
    fi
    
    echo -e "$vhosts" >> "$f_results"

    old_uniq_hosts=$uniq_hosts
    uniq_hosts=$(sort < "$f_results"| uniq | wc -l | tr -d ' ')

    # how many new results did we get
    new_results=$(( uniq_hosts - old_uniq_hosts ))
    scrape_results+=("$new_results")

    if [[ $QUIET == 0 ]]; then
      extra_status_update=""
      if [ -n "$results_removed" ]; then
        extra_status_update="${R}Some results have been removed"
      fi

      display_progress
    fi

    # clean up tmp files
    if [[ $DEBUG == 0 ]]; then
      rm -f "$f_scraped_html"
    fi

    # check end conditions
    if [[ -n "$last_page_check" ]]; then
      if [[ $QUIET == 0 ]]; then
        echo -e "\n${R}Stopping. This is the last page of results." >&2
      fi
      break
    fi

    if [[ -z "$results_count" ]]; then
      if [[ $QUIET == 0 ]]; then
        echo -e "\n${R}Stopping. The search results count is missing." >&2
      fi
      break
    fi

    if [[ -n "$single_page" ]]; then
      if [[ $QUIET == 0 ]]; then
        echo -e "\n${R}Stopping. Returned only one page of results." >&2
      fi
      break
    fi

    if [[ ${scrape_results[*]} =~ $stop_sequence ]]; then
      if [[ $QUIET == 0 ]]; then
        echo -e "\n${R}Stopping after scraping $STOP_AFTER_PAGES pages without any new results.$N" >&2
      fi
      break
    fi

    (( page += 1 ))
  done
}

function repeat_zero() {
  how_many="$1"
  local str="0"
  for ((i = 0; i < ((how_many-1)); i++)); do str+=" 0"; done
  echo "$str"
}

function usage()
{
  clear
  banner

  if [[ $(tput lines) -ge 24 ]] && [[ $(tput lines) -le 26 ]]; then
    # show the banner only in an 80x24 default terminal
    echo -n "[Press Enter]"
    read -r
  else
    echo
  fi

echo -e "bing-ip2hosts is a Bing.com web scraper that discovers websites by IP address.
Use for OSINT and discovering attack-surface of penetration test targets.

Usage: $0 [OPTIONS] IP|hostname

OPTIONS are:
-o FILE\tOutput hostnames to FILE.
-i FILE\tInput list of IP addresses or hostnames from FILE.
-n NUM\tStop after NUM scraped pages return no new results (Default: $STOP_AFTER_PAGES).
-l\tSelect the language for use in the setlang parameter (Default: $BING_SETLANG).
-m\tSelect the market for use in the setmkt parameter (Default is unset).
-u\tOnly display hostnames. Default is to include URL prefixes.
-c\tCSV output. Outputs the IP and hostname on each line, separated by a comma.
-q\tQuiet. Disable output except for final results.
-t DIR\tUse this directory instead of /tmp.
-V\tDisplay the version number of bing-ip2hosts and exit.
"
  dependency_check
  exit 1
}

if [[ -z "$1" ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
  usage
fi

dependency_check

while getopts "i:o:n:l:m:ucqt:V" optionName; do
  case "$optionName" in
    i) INPUT_FILE=$OPTARG;;
    o) OUTPUT_FILE=$OPTARG;;
    n) STOP_AFTER_PAGES=$OPTARG;;
    l) BING_SETLANG="$OPTARG";;
    m) BING_SETMKT="$OPTARG";;
    u) DISPLAY_URL_PREFIX=0;;
    c) OUTPUT_IP=1;;
    q) QUIET=1;;
    t) TMP_DIR="$OPTARG";;
    V) echo "bing-ip2hosts v$VERSION"; exit;;
[?]) echo "Error"; exit 1;;
esac
done

shift $((OPTIND -1))

if [[ -z "$1" ]] && [[ -z "$INPUT_FILE" ]]; then
  echo "Missing IP address or hostname." >&2
  exit 1
fi

if [[ -n "$INPUT_FILE" ]]; then
  if [[ ! -r "$INPUT_FILE" ]]; then
    echo "Cannot read from $INPUT_FILE." >&2
    exit 1
  fi
fi

# initialize outputfile
if [[ -n "$OUTPUT_FILE" ]]; then
  touch "$OUTPUT_FILE"
  if [[ ! -w "$OUTPUT_FILE" ]]; then
    echo "Cannot write to $OUTPUT_FILE." >&2
    exit 1
  fi
fi

# Create TMP_DIR if necessary
if [[ ! -d "$TMP_DIR" ]]; then
  if [[ "$QUIET" == 0 ]]; then
    mkdir -v -p "$TMP_DIR"
  else
    mkdir -p "$TMP_DIR"
  fi
  if [[ ! "$?" == 0 ]]; then
    echo "Invalid TMP Directory: $TMP_DIR." >&2
    exit 1
  fi
fi

if [[ $QUIET == 0 ]]; then
  clear
fi

stop_sequence=$(repeat_zero "$STOP_AFTER_PAGES")

if [[ -n "$INPUT_FILE" ]]; then

  declare -a result_files
  declare -a result_ips
  declare -a result_pages

  while read -r target
  do
    scrape_ip "$target"
    # copy $f_results
    result_files+=("$f_results")
    result_ips+=("$IP")
    result_pages+=("$page")
  done < "$INPUT_FILE"

  # report on each result
  for (( i=0; i < ${#result_files[@]}; i++)); do
    f_results=${result_files[i]}
    IP=${result_ips[i]}
    page=${result_pages[i]}
    show_hosts
  done

else
  target="$1"
  scrape_ip "$target"
  show_hosts
fi
