#!/bin/sh
set -eu

# ============================================================
# Remote .NET Coverage Agent (POSIX sh)
# Alpine + Ubuntu compatible
# Modes:
#   MODE=auto     : (existing) kill by APP_PATTERN (if set), start dotnet APP_PATH
#   MODE=manual   : no kill, start dotnet APP_PATH
#   MODE=explicit : STOP_CMD first, start START_CMD under dotnet-coverage; stop via STOP_CMD
# ============================================================

log() { echo "[$(date '+%F %T')] $*"; }
have() { command -v "$1" >/dev/null 2>&1; }

MODE="${MODE:-auto}"
COVERAGE_SETTINGS_FILE="${COVERAGE_SETTINGS_FILE:-}"
detect_os() {
  if [ -f /etc/os-release ]; then
    . /etc/os-release
    OS_ID="${ID:-unknown}"
  else
    OS_ID="unknown"
  fi
}

install_packages() {
  detect_os
  case "$OS_ID" in
    alpine)
      log "Detected Alpine Linux"
      if have apk; then
        apk add --no-cache \
          curl icu-libs libgcc libstdc++ \
          net-tools procps iproute2 >/dev/null 2>&1 || true
      fi
      ;;
    ubuntu|debian)
      log "Detected Ubuntu/Debian"
      if have apt-get; then
        apt-get update -y >/dev/null 2>&1 || true
        apt-get install -y \
          curl net-tools procps iproute2 >/dev/null 2>&1 || true
      fi
      ;;
    *)
      log "[WARN] Unknown OS"
      ;;
  esac
}

set_tool_path() {
  export PATH="/tools:$HOME/.dotnet/tools:$PATH"
}

install_dotnet_tools() {
  set_tool_path
  if ! have dotnet; then
    log "[ERROR] dotnet not found – expected to be preinstalled"
    return 1
  fi

  if ! have dotnet-coverage; then
    dotnet tool install --global dotnet-coverage >/dev/null 2>&1 || \
    dotnet tool update  --global dotnet-coverage >/dev/null 2>&1 || true
  fi

  if ! have reportgenerator; then
    dotnet tool install --global dotnet-reportgenerator-globaltool >/dev/null 2>&1 || \
    dotnet tool update  --global dotnet-reportgenerator-globaltool >/dev/null 2>&1 || true
  fi
  return 0
}

mk_layout() {
  mkdir -p "$COVERAGE_OUTDIR/logs" "$COVERAGE_OUTDIR/html"
}

# make COVERAGE_OUTDIR absolute so collector --output is absolute too
abs_outdir() {
  COVERAGE_OUTDIR="${COVERAGE_OUTDIR:-/tmp/coverage}"
  case "$COVERAGE_OUTDIR" in
    /*) : ;;
    *)  COVERAGE_OUTDIR="$(pwd)/$COVERAGE_OUTDIR" ;;
  esac
}

find_pid_by_pattern() {
  pat="$1"
  ps -eo pid,args | grep -F "$pat" | grep -v grep | awk '{print $1}'
}

kill_existing_process() {
  pat="$1"
  [ -z "$pat" ] && return

  PIDS="$(find_pid_by_pattern "$pat" 2>/dev/null || true)"
  if [ -n "$PIDS" ]; then
    log "Found existing process(es) for pattern [$pat]: $PIDS"
    for p in $PIDS; do kill "$p" 2>/dev/null || true; done
    sleep 3
    for p in $PIDS; do
      if ps -p "$p" >/dev/null 2>&1; then
        log "Force killing PID $p"
        kill -9 "$p" 2>/dev/null || true
      fi
    done
  else
    log "No existing process found for pattern [$pat]"
  fi
}

kill_by_pattern_graceful() {
  pat="$1"
  [ -z "$pat" ] && return 0

  log "Stopping existing process(es) matching pattern: [$pat]"

  if have pkill; then
    pkill -TERM -f "$pat" 2>/dev/null || true

    if have pgrep; then
      i=0
      while [ $i -lt 15 ]; do
        pgrep -f "$pat" >/dev/null 2>&1 || return 0
        sleep 1
        i=$((i + 1))
      done
      log "[WARN] Still running after TERM; force killing pattern [$pat]"
      pkill -KILL -f "$pat" 2>/dev/null || true
    else
      sleep 3
      pkill -KILL -f "$pat" 2>/dev/null || true
    fi

    return 0
  fi

  kill_existing_process "$pat"
}


wait_for_file() {
  f="$1"
  max="${2:-60}"
  i=0
  while [ $i -lt "$max" ]; do
    [ -f "$f" ] && return 0
    sleep 1
    i=$((i + 1))
  done
  return 1
}

wait_pid_exit() {
  pid="$1"
  max="${2:-20}"
  i=0
  while [ $i -lt "$max" ]; do
    kill -0 "$pid" 2>/dev/null || return 0
    sleep 1
    i=$((i + 1))
  done
  return 1
}

recover_app_pid_from_collector() {
  coll="$1"
  if have pgrep; then
    # pick first child process; most cases it's the dotnet app
    pgrep -P "$coll" 2>/dev/null | head -n 1 || true
  else
    ps -eo pid,ppid | awk -v p="$coll" '$2==p {print $1; exit}'
  fi
}

collector_output_path() {
  coll="$1"
  # parse "--output <path>" from collector args
  args="$(ps -o args= -p "$coll" 2>/dev/null || true)"
  echo "$args" | awk '
    {
      for (i=1; i<=NF; i++) {
        if ($i=="--output" && (i+1)<=NF) { print $(i+1); exit }
      }
    }'
}

run_stop_cmd() {
  cmd="$1"
  [ -z "$cmd" ] && return 0
  log "Stopping existing app via STOP_CMD: $cmd"
  sh -lc "$cmd" || true
}

# ================= START =================
start_impl() {
  #We added the two keywords below to stop the ND Collector if it is already running.
  export CORECLR_ENABLE_PROFILING=0
  export DOTNET_STARTUP_HOOKS=""

  abs_outdir

  case "$MODE" in
    auto|manual)
      [ -n "${APP_PATH:-}" ] || { log "[ERROR] APP_PATH required"; exit 1; }
      ;;
    explicit)
      [ -n "${START_CMD:-}" ] || { log "[ERROR] START_CMD required for MODE=explicit"; exit 1; }
      [ -n "${STOP_CMD:-}" ]  || { log "[ERROR] STOP_CMD required for MODE=explicit"; exit 1; }
      ;;
    *)
      log "[ERROR] Invalid MODE=$MODE"; exit 1 ;;
  esac

  install_packages
  install_dotnet_tools || exit 1
  set_tool_path

  DOTNET_COVERAGE="$(command -v dotnet-coverage 2>/dev/null || true)"
  [ -n "$DOTNET_COVERAGE" ] || {
    log "[ERROR] dotnet-coverage not found (PATH=$PATH)."
    exit 1
  }

  mk_layout

  # AUTO kill logic UNCHANGED
  if [ "$MODE" = "auto" ] && [ -n "${APP_PATTERN:-}" ]; then
    log "Checking existing process using pattern: $APP_PATTERN"
   # kill_existing_process "$APP_PATTERN"
#    kill_by_pattern_graceful "$APP_PATTERN" || true
    STOP_CMD="pkill -f -- \"$APP_PATTERN\" || true"
    run_stop_cmd "$STOP_CMD"

  fi

  # explicit: stop first, then optional pattern kill
  if [ "$MODE" = "explicit" ]; then
    log "EXPLICIT: stopping existing app via STOP_CMD"
    sh -lc "$STOP_CMD" || true
    if [ -n "${APP_PATTERN:-}" ]; then
      log "EXPLICIT: fallback kill by pattern: $APP_PATTERN"
   #   kill_existing_process "$APP_PATTERN"
      kill_by_pattern_graceful "$APP_PATTERN" || true
    fi
  fi

  log "Cleaning old coverage files..."
  rm -f "$COVERAGE_OUTDIR"/coverage*.xml "$COVERAGE_OUTDIR"/coverage*.coverage 2>/dev/null || true
  rm -rf "$COVERAGE_OUTDIR/html" "$COVERAGE_OUTDIR/coverage-report" 2>/dev/null || true
  mkdir -p "$COVERAGE_OUTDIR/html"

  if [ -n "${APP_PATH:-}" ]; then
    cd "$(dirname "$APP_PATH")"
  fi
  SETTINGS_ARG=""
  if [ -n "$COVERAGE_SETTINGS_FILE" ] && [ -f "$COVERAGE_SETTINGS_FILE" ]; then
    log "Using coverage settings: $COVERAGE_SETTINGS_FILE"
    SETTINGS_ARG="--settings $COVERAGE_SETTINGS_FILE"
  else
    log "[WARN] coverage settings file not found (COVERAGE_SETTINGS_FILE=$COVERAGE_SETTINGS_FILE). Running without settings may use more memory."
  fi

  log "Starting application under dotnet-coverage..."
  if [ "$MODE" = "explicit" ]; then
    nohup "$DOTNET_COVERAGE" collect \
      $SETTINGS_ARG \
      --output "$COVERAGE_OUTDIR/coverage.cobertura.xml" \
      --output-format cobertura \
      -- sh -c "$START_CMD" \
      >"$COVERAGE_OUTDIR/logs/app.out" \
      2>"$COVERAGE_OUTDIR/logs/app.err" \
      </dev/null &
  else
    nohup "$DOTNET_COVERAGE" collect \
      $SETTINGS_ARG \
      --output "$COVERAGE_OUTDIR/coverage.cobertura.xml" \
      --output-format cobertura \
      dotnet "$(basename "$APP_PATH")" \
      >"$COVERAGE_OUTDIR/logs/app.out" \
      2>"$COVERAGE_OUTDIR/logs/app.err" \
      </dev/null &
  fi

  COLL_PID=$!
  echo "$COLL_PID" >"$COVERAGE_OUTDIR/collector.pid"
  log "Collector PID=$COLL_PID"

  sleep 1
  if ! kill -0 "$COLL_PID" 2>/dev/null; then
    log "[ERROR] dotnet-coverage exited immediately."
    log "---- app.err (last 200) ----"
    tail -n 200 "$COVERAGE_OUTDIR/logs/app.err" 2>/dev/null || true
    exit 1
  fi

  sleep 2

  APP_PID=""
  for i in 1 2 3 4 5; do
    APP_PID="$(recover_app_pid_from_collector "$COLL_PID")"
    [ -n "$APP_PID" ] && break
    sleep 1
  done

  if [ -n "$APP_PID" ]; then
    echo "$APP_PID" >"$COVERAGE_OUTDIR/app.pid"
    log "Detected app PID=$APP_PID"
  else
    log "[WARN] App PID not detected (collector PID=$COLL_PID)"
  fi

  log "✅ Start done"
}

# ================= STOP =================
stop_impl() {
  abs_outdir
  [ -n "${UUID:-}" ] || { log "[ERROR] UUID required"; exit 1; }

  set_tool_path

  APP_PID=""
  COLL_PID=""

  [ -f "$COVERAGE_OUTDIR/app.pid" ] && APP_PID="$(cat "$COVERAGE_OUTDIR/app.pid" 2>/dev/null || true)"
  [ -f "$COVERAGE_OUTDIR/collector.pid" ] && COLL_PID="$(cat "$COVERAGE_OUTDIR/collector.pid" 2>/dev/null || true)"

  if [ -z "$COLL_PID" ] && have pgrep; then
    COLL_PID="$(pgrep -f 'dotnet-coverage collect' | head -n 1 || true)"
    [ -n "$COLL_PID" ] && log "[WARN] Collector PID recovered: $COLL_PID"
  fi

  # recover APP_PID if missing
  if [ -z "$APP_PID" ] && [ -n "$COLL_PID" ]; then
    APP_PID="$(recover_app_pid_from_collector "$COLL_PID")"
    [ -n "$APP_PID" ] && log "[WARN] App PID recovered from collector: $APP_PID"
  fi

  # detect the real output path from collector args (fixes your cavisson-ubuntu0 vs /tmp/coverage mismatch)
  REAL_OUT=""
  if [ -n "$COLL_PID" ]; then
    REAL_OUT="$(collector_output_path "$COLL_PID" || true)"
  fi

  # ---- IMPORTANT: stop app first so collector can flush reliably ----
  if [ "$MODE" = "explicit" ]; then
    [ -n "${STOP_CMD:-}" ] || { log "[ERROR] STOP_CMD required for MODE=explicit"; exit 1; }
    log "EXPLICIT: stopping app via STOP_CMD"
    sh -lc "$STOP_CMD" || { log "[ERROR] STOP_CMD failed"; exit 1; }
    sleep 2
  else
    if [ -n "$APP_PID" ] && kill -0 "$APP_PID" 2>/dev/null; then
      log "Stopping app PID=$APP_PID"
      kill -TERM "$APP_PID" 2>/dev/null || true
      if ! wait_pid_exit "$APP_PID" 20; then
        log "[WARN] App still alive; force killing PID=$APP_PID"
        kill -9 "$APP_PID" 2>/dev/null || true
      fi
    else
      log "[WARN] App PID not running or not found"
    fi
  fi

  # stop collector after app is stopped
  if [ -n "$COLL_PID" ] && kill -0 "$COLL_PID" 2>/dev/null; then
    log "Stopping collector PID=$COLL_PID"
    kill -INT "$COLL_PID" 2>/dev/null || true
    if ! wait_pid_exit "$COLL_PID" 30; then
      log "[WARN] Collector still alive; sending TERM PID=$COLL_PID"
      kill -TERM "$COLL_PID" 2>/dev/null || true
      wait_pid_exit "$COLL_PID" 20 || true
    fi
  fi

  # Prefer the real output path if we found one
  EXPECTED="$COVERAGE_OUTDIR/coverage.cobertura.xml"
  OUT_TO_WAIT="$EXPECTED"
  if [ -n "${REAL_OUT:-}" ]; then
    OUT_TO_WAIT="$REAL_OUT"
    log "[WARN] Collector output path detected: $REAL_OUT"
  fi

  log "Waiting for coverage XML..."
  if ! wait_for_file "$OUT_TO_WAIT" 90; then
    log "[ERROR] coverage XML not generated (waited for $OUT_TO_WAIT)"
    log "---- diag: ps dotnet ----"
    ps -ef | grep dotnet | grep -v grep || true
    log "---- app.err (last 200) ----"
    tail -n 200 "$COVERAGE_OUTDIR/logs/app.err" 2>/dev/null || true
    exit 1
  fi

  # If real output is different from expected, copy it so controller always finds expected path
  if [ "$OUT_TO_WAIT" != "$EXPECTED" ]; then
    mkdir -p "$(dirname "$EXPECTED")"
    cp -f "$OUT_TO_WAIT" "$EXPECTED" 2>/dev/null || true
    log "[WARN] Copied coverage XML to expected path: $EXPECTED"
  fi

  log "✅ Coverage XML ready"
  rm -f "$COVERAGE_OUTDIR/app.pid" "$COVERAGE_OUTDIR/collector.pid"
}

case "${1:-}" in
  start) start_impl ;;
  stop)  stop_impl ;;
  *) echo "Usage: $0 start|stop"; exit 1 ;;
esac

