#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

TIKA_SCRIPT="$0"
verbose=false
THIS_OS=`uname -s`

# What version of Java is required to run this version of Tika.
JAVA_VER_REQ="8"

stop_all=false

# for now, we don't support running this script from cygwin due to problems
# like not having lsof, ps auxww, curl, and awkward directory handling
if [ "${THIS_OS:0:6}" == "CYGWIN" ]; then
  echo -e "This script does not support cygwin due to severe limitations and lack of adherence\nto BASH standards, such as lack of lsof, curl, and ps options."
  exit 1
fi

# Resolve symlinks to this script
while [ -h "$TIKA_SCRIPT" ] ; do
  ls=`ls -ld "$TIKA_SCRIPT"`
  # Drop everything prior to ->
  link=`expr "$ls" : '.*-> \(.*\)$'`
  if expr "$link" : '/.*' > /dev/null; then
    TIKA_SCRIPT="$link"
  else
    TIKA_SCRIPT=`dirname "$TIKA_SCRIPT"`/"$link"
  fi
done

TIKA_TIP=`dirname "$TIKA_SCRIPT"`/..
TIKA_TIP=`cd "$TIKA_TIP"; pwd`
DEFAULT_SERVER_DIR="$TIKA_TIP/"

# If an include wasn't specified in the environment, then search for one...
if [ -z "$TIKA_INCLUDE" ]; then
  # Locations (in order) to use when searching for an include file.
  for include in "`dirname "$0"`/tika.in.sh" \
               "$HOME/.tika.in.sh" \
               /usr/share/tika/tika.in.sh \
               /usr/local/share/tika/tika.in.sh \
               /etc/default/tika.in.sh \
               /opt/tika/tika.in.sh; do
    if [ -r "$include" ]; then
        TIKA_INCLUDE="$include"
        . "$include"
        break
    fi
  done
elif [ -r "$TIKA_INCLUDE" ]; then
  . "$TIKA_INCLUDE"
fi

if [ -z "$TIKA_PID_DIR" ]; then
  TIKA_PID_DIR="$TIKA_TIP/bin"
fi





echo "Default server $DEFAULT_SERVER_DIR"

if [ -n "$TIKA_JAVA_HOME" ]; then
  JAVA="$TIKA_JAVA_HOME/bin/java"
elif [ -n "$JAVA_HOME" ]; then
  for java in "$JAVA_HOME"/bin/amd64/java "$JAVA_HOME"/bin/java; do
    if [ -x "$java" ]; then
      JAVA="$java"
      break
    fi
  done
  if [ -z "$JAVA" ]; then
    echo >&2 "The currently defined JAVA_HOME ($JAVA_HOME) refers"
    echo >&2 "to a location where Java could not be found.  Aborting."
    echo >&2 "Either fix the JAVA_HOME variable or remove it from the"
    echo >&2 "environment so that the system PATH will be searched."
    exit 1
  fi
else
  JAVA=java
fi

if [ -z "$TIKA_STOP_WAIT" ]; then
  TIKA_STOP_WAIT=180
fi
# test that Java exists, is executable and correct version
JAVA_VER=$("$JAVA" -version 2>&1)
if [[ $? -ne 0 ]] ; then
  echo >&2 "Java not found, or an error was encountered when running java."
  echo >&2 "A working Java $JAVA_VER_REQ JRE is required to run Tika!"
  echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
  echo >&2 "Command that we tried: '${JAVA} -version', with response:"
  echo >&2 "${JAVA_VER}"
  echo >&2
  echo >&2 "Debug information:"
  echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
  echo >&2 "Active Path:"
  echo >&2 "${PATH}"
  exit 1
else
  JAVA_VER_NUM=$(echo $JAVA_VER | head -1 | awk -F '"' '/version/ {print $2}' | sed -e's/^1\.//' | sed -e's/[._-].*$//')
  if [[ "$JAVA_VER_NUM" -lt "$JAVA_VER_REQ" ]] ; then
    echo >&2 "Your current version of Java is too old to run this version of Tika."
    echo >&2 "We found major version $JAVA_VER_NUM, using command '${JAVA} -version', with response:"
    echo >&2 "${JAVA_VER}"
    echo >&2
    echo >&2 "Please install latest version of Java $JAVA_VER_REQ or set JAVA_HOME properly."
    echo >&2
    echo >&2 "Debug information:"
    echo >&2 "JAVA_HOME: ${JAVA_HOME:-N/A}"
    echo >&2 "Active Path:"
    echo >&2 "${PATH}"
    exit 1
  fi
  JAVA_VENDOR="Oracle"
  if [ "`echo $JAVA_VER | grep -i "IBM J9"`" != "" ]; then
      JAVA_VENDOR="IBM J9"
  fi
fi


function print_usage() {
  CMD="$1"
  ERROR_MSG="$2"

  if [ "$ERROR_MSG" != "" ]; then
    echo -e "\nERROR: $ERROR_MSG\n"
  fi

  if [ -z "$CMD" ]; then
    echo ""
    echo "Usage: tika COMMAND OPTIONS"
    echo "       where COMMAND is one of: start, stop, status"
    echo ""
    echo "  Standalone server example (start Tika running in the background on port 9998):"
    echo ""
    echo "    ./tika start -p 9998"
    echo ""
    echo "Pass -help after any COMMAND to see command-specific usage information,"
    echo "  such as:    ./tika start -help or ./tika stop -help"
    echo ""
  elif [ "$CMD" == "start" ]; then
    echo ""
    echo "Usage: tika $CMD [-f] [-h hostname] [-p port] [-d directory] [-V]"
    echo ""
    echo "  -f            Start Tika in foreground; default starts Tika in the background"
    echo "                  and sends stdout / stderr to tika-PORT-console.log"
    echo ""
    echo "  -p <port>     Specify the port to start the Tika HTTP listener on; default is 9998"
    echo ""
    echo "  -d            Specify the Tika server directory; defaults to ../"
    echo ""
    echo "  -j/--jar      Specify the tika-server.jar; defaults to tika-server.jar"
    echo ""
    echo "  -V/--verbose   Verbose messages from this script"
    echo ""

  elif [ "$CMD" == "stop" ]; then
    echo ""
    echo "Usage: tika stop [-p port] [-V]"
    echo ""
    echo "  -p <port>     Specify the port the Tika HTTP listener is bound to"
    echo ""
    echo "  --all          Find and stop all running Tika servers on this host"
    echo ""
    echo "  -V/--verbose   Verbose messages from this script"
    echo ""
    echo "  NOTE: To see if any Tika servers are running, do: tika status"
    echo ""
  elif [ "$CMD" == "status" ]; then
    echo ""
    echo "Usage: tika status"
    echo ""
    echo "  This command will show the status of all running Tika servers."
    echo "  It can only detect those Tika servers running on the current host."
    echo ""
  fi
} # end print_usage

# used to show the script is still alive when waiting on work to complete
function spinner() {
  local pid=$1
  local delay=0.5
  local spinstr='|/-\'
  while [ "$(ps aux | awk '{print $2}' | grep -w $pid)" ]; do
      local temp=${spinstr#?}
      printf " [%c]  " "$spinstr"
      local spinstr=$temp${spinstr%"$temp"}
      sleep $delay
      printf "\b\b\b\b\b\b"
  done
  printf "    \b\b\b\b"
}

# uses terminate -9 to stop Tika process
function stop_tika() {

  DIR="$1"
  TIKA_PID="$2"

  if [ -z "$TIKA_PID" ]; then
    echo "ERROR: No PID found for Tika running on port $TIKA_PORT ... script fails."
    exit 1
  fi
  echo "Sending terminate command to Tika running on port $TIKA_PORT with process $TIKA_PID"
  kill -9 $TIKA_PID
  rm -f "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"
  sleep 1

  CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $TIKA_PID | sort -r | tr -d ' '`
  if [ "$CHECK_PID" != "" ]; then
    echo "ERROR: Failed to terminate previous Tika Java process $TIKA_PID ... script fails."
    exit 1
  fi
} # end stop_tika


if [ $# -eq 1 ]; then
  case $1 in
    -help|-usage|-h|--help)
        print_usage ""
        exit
    ;;
    -info|-i|status)
        #get_info
        echo "To be done"
        exit $?
    ;;
  esac
fi

if [ $# -gt 0 ]; then
  # if first arg starts with a dash (and it's not -help or -info),
  # then assume they are starting Tika, such as: tika -f
  if [[ $1 == -* ]]; then
    SCRIPT_CMD="start"
  else
    SCRIPT_CMD="$1"
    shift
  fi
else
  # no args - just show usage and exit
  print_usage ""
  exit
fi

# verify the command given is supported
if [ "$SCRIPT_CMD" != "stop" ] && [ "$SCRIPT_CMD" != "start" ]; then
  print_usage "" "$SCRIPT_CMD is not a valid command!"
  exit 1
fi


# Run in foreground (default is to run in the background)
FG="false"

if [ $# -gt 0 ]; then
  while true; do
    case "$1" in
      -d|-dir)
          if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
            print_usage "$SCRIPT_CMD" "Server directory is required when using the $1 option!"
            exit 1
          fi

          if [[ "$2" == "." || "$2" == "./" || "$2" == ".." || "$2" == "../" ]]; then
            TIKA_SERVER_DIR="$(pwd)/$2"
          else
            # see if the arg value is relative to the tip vs full path
            if [[ "$2" != /* ]] && [[ -d "$TIKA_TIP/$2" ]]; then
              TIKA_SERVER_DIR="$TIKA_TIP/$2"
            else
              TIKA_SERVER_DIR="$2"
            fi
          fi
          # resolve it to an absolute path
          TIKA_SERVER_DIR="$(cd "$TIKA_SERVER_DIR"; pwd)"
          shift 2
      ;;

        -f|-foreground)
            FG="true"
            shift
        ;;
        -p|-port)
            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
              print_usage "$SCRIPT_CMD" "Port number is required when using the $1 option!"
              exit 1
            fi
            TIKA_PORT="$2"
            shift 2
        ;;
        -h|-host)
            if [[ -z "$2" || "${2:0:1}" == "-" ]]; then
              print_usage "$SCRIPT_CMD" "Hostname is required when using the $1 option!"
              exit 1
            fi
            TIKA_HOST="$2"
            shift 2
        ;;
        -j|--jar)
            TIKA_SERVER_JAR="$2"
            shift 2
        ;;
        -help|-usage)
            print_usage "$SCRIPT_CMD"
            exit 0
        ;;
        -V|--verbose)
            verbose=true
            shift
        ;;
        --all)
            stop_all=true
            shift
        ;;
        --)
            shift
            break
        ;;
        *)
            if [ "${1:0:2}" == "-D" ]; then
              # pass thru any opts that begin with -D (java system props)
              TIKA_OPTS+=("$1")
              PASS_TO_RUN_EXAMPLE+=" $1"
              shift
            else
              if [ "$1" != "" ]; then
                print_usage "$SCRIPT_CMD" "$1 is not supported by this script"
                exit 1
              else
                break # out-of-args, stop looping
              fi
            fi
        ;;
    esac
  done
fi

if [ -z "$TIKA_SERVER_DIR" ]; then
  TIKA_SERVER_DIR="$DEFAULT_SERVER_DIR"
fi

if [ ! -e "$TIKA_SERVER_DIR" ]; then
  echo -e "\nTika server directory $TIKA_SERVER_DIR not found!\n"
  exit 1
fi

############# start/stop logic below here ################

if $verbose ; then
  echo "Using Tika root directory: $TIKA_TIP"
  echo "Using Java: $JAVA"
  "$JAVA" -version
fi

# stop all if no port specified
if [[ "$SCRIPT_CMD" == "stop" ]]; then
  if $stop_all; then
    none_stopped=true
    find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | while read PIDF
      do
        NEXT_PID=`cat "$PIDF"`
        echo "About to tika port with $NEXT_PID"
        stop_tika "$TIKA_SERVER_DIR" "$NEXT_PID"
        none_stopped=false
        rm -f "$PIDF"
    done
    # TODO: none_stopped doesn't get reflected across the subshell
    # This can be uncommented once we find a clean way out of it
    # if $none_stopped; then
    #   echo -e "\nNo Tika servers found to stop.\n"
    # fi
  else
    # not stopping all and don't have a port, but if we can find the pid file for the default port 9998, then use that
    none_stopped=true
    numTikas=`find "$TIKA_PID_DIR" -name "tika-*.pid" -type f | wc -l | tr -d ' '`
    if [ -z "$TIKA_PORT" ]; then
      if [ $numTikas -eq 1 ]; then
        # only do this if there is only 1 server running, otherwise they must provide the -p or --all
        PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-*.pid" -type f)")"
        CHECK_PID=`ps auxww | awk '{print $2}' | grep -w $PID | sort -r | tr -d ' '`
        if [ "$CHECK_PID" != "" ]; then
          stop_tika "$TIKA_SERVER_DIR" "$CHECK_PID"
          none_stopped=false
        fi
      fi
    else
      PID="$(cat "$(find "$TIKA_PID_DIR" -name "tika-$TIKA_PORT.pid" -type f)")"
      stop_tika "$TIKA_SERVER_DIR" "$PID"
      none_stopped=false
    fi

    if $none_stopped; then
      if [ $numTikas -gt 0 ]; then
        echo -e "\nFound $numTikas Tika servers running! Must either specify a port using -p or --all to stop all Tika servers on this host.\n"
      else
        echo -e "\nNo Tika servers found to stop.\n"
      fi
      exit 1
    fi
  fi
  exit
fi


if [ -z "$TIKA_PORT" ]; then
  TIKA_PORT=9998
fi

if [ -z "$TIKA_HOST" ]; then
  TIKA_HOST='0.0.0.0'
fi

if [ -z "$TIKA_SERVER_JAR" ]; then
  TIKA_SERVER_JAR=tika-server.jar
fi

if [ -z "$TIKA_LOGS_DIR" ]; then
  TIKA_LOGS_DIR="$TIKA_SERVER_DIR/logs"
fi

# Launches Tika in foreground/background depending on parameters
function start_tika() {

  run_in_foreground="$1"
  TIKA_ADDL_ARGS="$2"

  # define default GC_TUNE
  if [ -z ${GC_TUNE+x} ]; then
      GC_TUNE=('-XX:+UseG1GC' \
        '-XX:+PerfDisableSharedMem' \
        '-XX:+ParallelRefProcEnabled' \
        '-XX:MaxGCPauseMillis=250' \
        '-XX:+UseLargePages' \
        '-XX:+AlwaysPreTouch')
  else
    GC_TUNE=($GC_TUNE)
  fi

  if $verbose ; then
    echo -e "\nStarting Tika using the following settings:"
    echo -e "    JAVA            = $JAVA"
    echo -e "    TIKA_SERVER_DIR = $TIKA_SERVER_DIR"
    echo -e "    TIKA_SERVER_JAR = $TIKA_SERVER_JAR"
    echo -e "    TIKA_HOST       = $TIKA_HOST"
    echo -e "    TIKA_PORT       = $TIKA_PORT"
    echo -e "    JAVA_MEM_OPTS   = ${JAVA_MEM_OPTS[@]}"
    echo -e "    GC_TUNE         = ${GC_TUNE[@]}"
    echo -e "    GC_LOG_OPTS     = ${GC_LOG_OPTS[@]}"
    echo -e "    TIKA_FORKED_OPTS = $TIKA_FORKED_OPTS"

    if [ "$TIKA_OPTS" != "" ]; then
      echo -e "    TIKA_OPTS       = ${TIKA_OPTS[@]}"
    fi

    if [ "$TIKA_ADDL_ARGS" != "" ]; then
      echo -e "    TIKA_ADDL_ARGS  = $TIKA_ADDL_ARGS"
    fi

    if [ "$TIKA_DATA_HOME" != "" ]; then
      echo -e "    TIKA_DATA_HOME  = $TIKA_DATA_HOME"
    fi

    echo -e "\n"
  fi

  # need to launch tika from the server dir
  cd "$TIKA_SERVER_DIR"
  if [ ! -e "$TIKA_SERVER_DIR/$TIKA_SERVER_JAR" ]; then
    echo -e "\nERROR: $TIKA_SERVER_JAR file not found in $TIKA_SERVER_DIR\nPlease check your -d and -j parameters to set the correct Tika server directory and jar.\n"
    exit 1
  fi

  TIKA_START_OPTS=('-server' "${JAVA_MEM_OPTS[@]}" "${GC_TUNE[@]}" "${GC_LOG_OPTS[@]}" \
    "${TIKA_HOST_ARG[@]}" \
     "${LOG4J_CONFIG[@]}" "${TIKA_OPTS[@]}")

  mkdir -p "$TIKA_LOGS_DIR" 2>/dev/null
  if [ $? -ne 0 ]; then
    echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR could not be created. Exiting"
    exit 1
  fi
  if [ ! -w "$TIKA_LOGS_DIR" ]; then
    echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is not writable. Exiting"
    exit 1
  fi
  case "$TIKA_LOGS_DIR" in
    contexts|etc|lib|modules|resources|scripts|tika|tika-webapp)
      echo -e "\nERROR: Logs directory $TIKA_LOGS_DIR is invalid. Reserved for the system. Exiting"
      exit 1
      ;;
  esac


  if [ "$run_in_foreground" == "true" ]; then
    if $verbose ; then
      echo "Startup command"
      echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_FORKED_OPTS"
    fi
    exec "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_TIKA_FORKED_OPTS
  else
    # run Tika in the background
    if $verbose ; then
      echo "Startup command"
      echo "$JAVA ${TIKA_START_OPTS[@]} $TIKA_ADDL_ARGS -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_FORKED_OPTS $TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log $TIKA_PID_DIR/tika-$TIKA_PORT.pid"
    fi
    nohup "$JAVA" "${TIKA_START_OPTS[@]}" $TIKA_ADDL_ARGS \
	      -jar $TIKA_SERVER_JAR -p $TIKA_PORT -h $TIKA_HOST $TIKA_TIKA_FORKED_OPTS \
	1>"$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log" 2>&1 & echo $! > "$TIKA_PID_DIR/tika-$TIKA_PORT.pid"

    # no lsof on cygwin though
    if hash lsof 2>/dev/null ; then  # hash returns true if lsof is on the path
      echo -n "Waiting up to $TIKA_STOP_WAIT seconds to see Tika running on port $TIKA_PORT"
      # Launch in a subshell to show the spinner
      (loops=0
      while true
      do
        running=`lsof -PniTCP:$TIKA_PORT -sTCP:LISTEN`
        if [ -z "$running" ]; then
	  slept=$((loops * 2))
          if [ $slept -lt $TIKA_STOP_WAIT ]; then
            sleep 2
            loops=$[$loops+1]
          else
            echo -e "Still not seeing Tika listening on $TIKA_PORT after $TIKA_STOP_WAIT seconds!"
            tail -30 "$TIKA_LOGS_DIR/tika-$TIKA_PORT-console.log"
            exit # subshell!
          fi
        else
          TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v grep | awk '{print $2}' | sort -r`
          echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
          exit # subshell!
        fi
      done) &
      spinner $!
    else
      echo -e "NOTE: Please install lsof as this script needs it to determine if Tika is listening on port $TIKA_PORT."
      sleep 10
      TIKA_PID=`ps auxww | grep tika\-server | grep -w "\-p $TIKA_PORT" | grep -v grep | awk '{print $2}' | sort -r`
      echo -e "\nStarted Tika server on port $TIKA_PORT (pid=$TIKA_PID). Happy extracting!\n"
      return;
    fi
  fi
} # end start_tika



if [[ "$SCRIPT_CMD" == "start" ]]; then
  start_tika "$FG" "$ADDITIONAL_CMD_OPTS"
  exit 1
fi
