#!/bin/bash
# shellcheck disable=SC1091,SC2030,SC2031
# ==========================================================
# clpocdns-watch.sh
# ------------------
# [Oracle Cloud DNS監視スクリプト]
# Oracle Cloud DNSゾーンに対象のAレコードが存在するか確認します
# ==========================================================

#
# デバッグ
#
DebugMode=false


#
# モジュール読み込み
#
ScriptRoot=$(dirname "$(realpath "$0")")
. "${ScriptRoot}/../common/clpcloudutil.sh"


#
# 環境変数
#
export OCI_CLI_SUPPRESS_FILE_PERMISSIONS_WARNING="True"


#
# スクリプト終了コード
#
Success=0                                           # [正常](0)
ErrorOciFailed=50                                   # [異常] OCI CLI失敗(50)
ErrorOciTimeout=51                                  # [異常] OCI CLIタイムアウト(51)
ErrorNotExistRecord=52                              # [異常] レコードセットが存在しない(52)
ErrorNameResolution=53                              # [異常] 名前解決失敗(53)
ErrorUnmatchIPAddress=54                            # [異常] 名前解決で取得したIPアドレス不正(54)
WarnOffset=100
WarnOciFailed=$((ErrorOciFailed + WarnOffset))      # [警告] OCI CLI失敗(150)
WarnOciTimeout=$((ErrorOciTimeout + WarnOffset))    # [警告] OCI CLIタイムアウト(151)
WarnGetClpConfValue=155                             # [警告] クラスタ構成情報から値の取得失敗(155)
WarnNotExistOCICmd=156                              # [警告] OCI CLI未インストール(156)
WarnNotExistHostCmd=157                             # [警告] hostコマンド未インストール(157)
WarnInternalError=179                               # [警告] 内部エラー(179)


#
# アラート出力文字列
#
AlertOciFailed="The OCI CLI command failed. (%1)"
AlertOciTimeout="Timeout occurred."
AlertNotExistRecord="The resource record set in Oracle Cloud DNS does not exist."
AlertNameResolution="Failed to resolve domain name."
AlertUnmatchIPAddress="IP address which is resolved domain name from the DNS resolver is different from the setting."
AlertGetClpConfValue="Failed to obtain the setting value."
AlertNotExistOCICmd="The OCI CLI command is not found."
AlertNotExistHostCmd="The host command is not found. Monitoring based on name resolution will be skipped."
AlertInternalError="Internal error occurred."


#
# OCI CLIエラー原因不明
#
ErrorCauseUnknown="Internal error"


#
# 上位で設定する環境変数: CLP_OCF_PARAM の数
#
ClpOcfParamCnt=6


#
# 外部コマンド正常終了
#
ExCmdSuccess=0


#
# アラートログ出力用の共通メモリ最大領域
#
ShmMaxSize=$((128 - 1))


#
# リソースタイプ
#
RscType="ocdns"
MonType="ocdnsw"


#
# マルチリージョンでのリソースレコードの操作範囲
#
# OperationOnlyActiveServerRegion=0       # 現用系サーバが所属するリージョンのみ
OperationAllServerRegion=1              # クラスタサーバが所属する全てのリージョン


#
# OCI CLIコマンド応答取得失敗時動作
#
NoRecoveryNoWarn=0      # 回復動作を実行しない(警告を表示しない)
NoRecoveryWarn=1        # 回復動作を実行しない(警告を表示する)
Recovery=2              # 回復動作を実行する


#
# タイムアウト発生時の終了コード(128 + SIGNAL)
#
ExitCodeWithTimeout=$((128 + 9))


# ----------------------------------------------------------
#
# 関数定義
#
# ----------------------------------------------------------

#
# プロキシ情報の設定
#
function SetProxy {
    unset HTTPS_PROXY
    useProxyHTTP=1
    proxyXmlPath="/root/server@${actServerName}/proxy"
    proxyScheme=$(GetClpConfValue "-g" "${proxyXmlPath}/scheme" "" false) || return
    WriteStdOut "proxyScheme: $proxyScheme"
    if [[ $proxyScheme -eq $useProxyHTTP ]]; then
        proxyServer=$(GetClpConfValue "-g" "${proxyXmlPath}/server" "" false) || return
        proxyPort=$(GetClpConfValue "-g" "${proxyXmlPath}/port" "" false) || return
        proxy="http://${proxyServer}:${proxyPort}"
        export HTTPS_PROXY=$proxy
        WriteStdOut "proxy: $HTTPS_PROXY"
    fi
}


#
# アラートログ出力文字列を共有メモリへ設定する
#
function SetAlertMessage {
    alertMessage=$1
    shmRmCmdLine="clpshmrmset --descript --mon -t $MonType -n '$monName' -m '$alertMessage'"
    result=$(eval "$shmRmCmdLine" 2>&1)
    shmRmSetExitCode=$?
    if [[ $shmRmSetExitCode -ne $ExCmdSuccess ]]; then
        WriteStdOut "[CommandLine] $shmRmCmdLine"
        WriteStdErr "The 'clpshmrmset' command failed. ($shmRmSetExitCode)"
        WriteStdErr "$result"
        # 処理継続
    fi
}


#
# クラスタ構成情報 (clp.conf) から値を取得
#
function GetClpConfValue {
    optType=$1
    xmlPath=$2
    server=$3
    fromOcDnsPolicy=${4:-true}
    cfGetCmdLine="clpcfget $optType $xmlPath"
    if $fromOcDnsPolicy; then
        cfGetCmdLine+=" -p $RscType"
    fi
    if [[ -n $server ]]; then
        cfGetCmdLine+=" -s $server"
    fi
    WriteDebug "cfGetCmdLine: $cfGetCmdLine"
    result=$(eval "$cfGetCmdLine" 2>&1)
    cfGetExitCode=$?
    if [[ $cfGetExitCode -ne $ExCmdSuccess ]]; then
        WriteStdErr "The 'clpcfget' command failed. ($cfGetExitCode: [CommandLine] $cfGetCmdLine)"
        WriteStdErr "$result"
        return 1
    fi
    echo "$result"
}


#
# [OCI CLI コマンド応答取得失敗時動作] に応じた終了コードを取得する
#
function ShiftStatusOnCliError {
    # (Note:)
    #    [回復動作を実行しない(警告を表示しない)] 場合は、
    #    上位モジュールへ「正常」で返却するため
    #    CLI実行権限不足などのケースを含めた、すべてのケースでアラート通知しません
    exitError=$1
    exitWarn=$2
    case $recoveryActOnCliErr in
        "$NoRecoveryNoWarn") exitCode=$Success; status="Normal" ;;
        "$NoRecoveryWarn") exitCode=$exitWarn; status="Warn" ;;
        "$Recovery") exitCode=$exitError; status="Error" ;;
        *)
            WriteStdErr "Unknown recoveryActOnCliErr: '$recoveryActOnCliErr'"
            exitCode=$exitError
            status="Error" ;;
    esac
    WriteDebug "Shift status: $status"
    echo "$exitCode"
}


#
# 標準出力に文字列を出力する
#
function WriteStdOut {
    local message=$1
    if ! $DebugMode; then
        # printf "%04d: %s\n" "${BASH_LINENO[0]}" "$message"
        echo "$message"
    else
        # printf "[STDOUT] %04d: %s\n" "${BASH_LINENO[0]}" "$message"
        echo "[STDOUT] $message"
    fi
}


#
# 標準エラー出力に文字列を出力する
#
function WriteStdErr {
    local message=$1
    if ! $DebugMode; then
        # printf "%04d: %s\n" "${BASH_LINENO[0]}" "$message" >&2
        echo "$message" >&2
    else
        # printf "[STDERR] %04d: %s\n" "${BASH_LINENO[0]}" "$message" >&2
        echo "[STDERR] $message" >&2
    fi
}


#
# コンソールに文字列を出力する(デバッグ用)
#
function WriteDebug {
    local message=$1
    if ! $DebugMode; then
        :
    else
        # printf "[ DEBUG] %04d: %s\n" "${BASH_LINENO[0]}" "$message" >/dev/tty
        echo "[ DEBUG] $message" >/dev/tty
    fi
}


#
# 0以上の整数か判定する
#
function IsNonNegativeNumber {
    number=$1
    if [[ $number =~ ^[0-9]+$ && $number -ge 0 ]]; then
        return 0
    else
        return 1
    fi
}


# ----------------------------------------------------------
#
# 処理開始
#
# ----------------------------------------------------------
WriteDebug "Start the monitoring process for Oracle Cloud DNS monitoring resource."

#
# 上位モジュールで設定した環境変数: CLP_* の確認
#
if [[ -z $CLP_RESOURCENAME ]]; then
    WriteStdErr "The environment variable 'CLP_RESOURCENAME' has not been set."
    SetAlertMessage "$AlertInternalError"
    exit $WarnInternalError
fi
actServerName=$(uname -n | cut -d '.' -f 1)
monName=$CLP_RESOURCENAME
WriteDebug "actServerName: $actServerName"
WriteStdOut "monName: $monName"

for i in $(seq 1 $ClpOcfParamCnt); do
    CLP_OCF_PARAM_VAR="CLP_OCF_PARAM${i}"
    if [[ -z ${!CLP_OCF_PARAM_VAR} ]]; then
        WriteStdErr "The environment variable '$CLP_OCF_PARAM_VAR' has not been set."
        SetAlertMessage "$AlertInternalError"
        exit $WarnInternalError
    fi
done
rscName=$CLP_OCF_PARAM1
recoveryActOnCliErr=$CLP_OCF_PARAM2
nameResolution=$CLP_OCF_PARAM3
monTimeoutSec=$CLP_OCF_PARAM4
monTimeoutMargin=$CLP_OCF_PARAM5
execScriptMargin=$CLP_OCF_PARAM6
for i in "recoveryActOnCliErr" "nameResolution" \
         "monTimeoutSec" "monTimeoutMargin" "execScriptMargin"; do
    if ! IsNonNegativeNumber "${!i}"; then
        WriteStdErr "'$i' contains non-negative values. (${!i})"
        SetAlertMessage "$AlertInternalError"
        exit $WarnInternalError
    fi
done
WriteStdOut "rscName: $rscName"
WriteStdOut "recoveryActOnCliErr: $recoveryActOnCliErr"
WriteStdOut "nameResolution: $nameResolution"
WriteStdOut "monTimeoutSec: $monTimeoutSec"
WriteDebug "monTimeoutMargin: $monTimeoutMargin"
WriteDebug "execScriptMargin: $execScriptMargin"


#
# OCI CLIインストール確認 (コマンドの存在確認)
#
if ! command -v oci >/dev/null; then
    WriteStdErr "The OCI CLI command was not found."
    SetAlertMessage "$AlertNotExistOCICmd"
    exit $WarnNotExistOCICmd
fi


#
# Oracle Cloud DNSリソース設定情報を取得
#
targetBaseXmlPath="/root/resource/$RscType@$rscName"
actServerRegionXmlPath="$targetBaseXmlPath/parameters/region"

domainFqdnXmlPath="$targetBaseXmlPath/parameters/domain"
actServerZoneOcidXmlPath="$targetBaseXmlPath/parameters/zoneid"
actServerIpAddressXmlPath="$targetBaseXmlPath/parameters/ip"
ttlSecXmlPath="$targetBaseXmlPath/parameters/ttl"
useProxySettingXmlPath="$targetBaseXmlPath/parameters/proxy/use"
operationScopeXmlPath="$targetBaseXmlPath/parameters/operationscope"

if ! actServerRegion=$(GetClpConfValue "-g" "$actServerRegionXmlPath" "$actServerName"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! domainFqdn=$(GetClpConfValue "-g" "$domainFqdnXmlPath"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! actServerZoneOcid=$(GetClpConfValue "-g" "$actServerZoneOcidXmlPath" "$actServerName"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! actServerIpAddress=$(GetClpConfValue "-g" "$actServerIpAddressXmlPath" "$actServerName"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! ttlSec=$(GetClpConfValue "-g" "$ttlSecXmlPath"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! useProxySetting=$(GetClpConfValue "-g" "$useProxySettingXmlPath"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi
if ! operationScope=$(GetClpConfValue "-g" "$operationScopeXmlPath"); then
    SetAlertMessage "$AlertGetClpConfValue"
    exit $WarnGetClpConfValue
fi

WriteDebug "actServerRegion: $actServerRegion"
WriteDebug "domainFqdn: $domainFqdn"
WriteDebug "actServerZoneOcid: $actServerZoneOcid"
WriteDebug "actServerIpAddress: $actServerIpAddress"
WriteStdOut "ttlSec: $ttlSec"
WriteStdOut "useProxySetting: $useProxySetting"
WriteStdOut "operationScope: $operationScope"


#
# プロキシ情報の設定
#
if [[ $useProxySetting -eq 1 ]]; then
    SetProxy
fi


#
# タイムアウト倍率の取得
#
defaultToRatio=1
toRatioResult=$(clptoratio -s 2>&1)
toRatioExitCode=$?
if [[ $toRatioExitCode -eq $ExCmdSuccess ]]; then
    # 出力結果(present toratio : <倍率>)から<倍率>のみを抽出
    toRatio=$(echo "$toRatioResult" | awk -F':' '{print $2}' | TrimWhiteSpace)
    if [[ ! $toRatio =~ ^[0-9]+$ ]]; then
        WriteStdErr "toRatio is not a number. ($toRatio)"
        WriteStdOut "Use the default timeout ratio. ($defaultToRatio)"
        toRatio=$defaultToRatio
    fi
else
    WriteStdErr "The 'clptoratio' comand failed. ($toRatioExitCode)"
    WriteStdErr "$toRatioResult"
    WriteStdOut "Use the default timeout ratio. ($defaultToRatio)"
    toRatio=$defaultToRatio
fi


#
# OCI CLIタイムアウトの取得
#
lowerLimitSec=3
ociTimeoutSec=$((monTimeoutSec * toRatio - monTimeoutMargin - execScriptMargin))
if [[ $ociTimeoutSec -lt $lowerLimitSec ]]; then
    ociTimeoutSec=$lowerLimitSec
fi
WriteStdOut "ociTimeoutSec: $ociTimeoutSec (ratio:$toRatio)"


#
# OCI CLIの実行コマンドライン設定
#
ociCmdLine="oci dns record rrset get"
ociCmdLine+=" --domain \"$domainFqdn\""
ociCmdLine+=" --zone-name-or-id \"$actServerZoneOcid\""
ociCmdLine+=" --rtype A"
ociCmdLine+=" --query \"data.items[0].rdata=='$actServerIpAddress'\""
ociCmdLine+=" --region \"$actServerRegion\""
WriteStdOut "[CommandLine] $ociCmdLine"


#
# OCI CLIの実行
#
# サブシェルで取得した標準出力・標準エラー出力・実行結果を取得
eval "$(eval "timeout -s SIGKILL $ociTimeoutSec $ociCmdLine" \
            2> >(ociResultErr=$(cat); declare -p ociResultErr) \
            1> >(ociResult=$(cat); declare -p ociResult); \
            ociExitCode=$?; declare -p ociExitCode )"
if [[ $ociExitCode -eq $ExitCodeWithTimeout ]]; then
    # OCI CLIタイムアウト
    WriteStdErr "The OCI CLI command timed out."
    exitCode=$(ShiftStatusOnCliError $ErrorOciTimeout $WarnOciTimeout)
    if [[ $exitCode -ne $Success ]]; then
        SetAlertMessage "$AlertOciTimeout"
    fi
    exit "$exitCode"
fi
if [[ $ociExitCode -ne $ExCmdSuccess ]]; then
    # OCI CLI異常終了
    WriteStdErr "The OCI CLI command failed. ($ociExitCode)"
    ociResultErr=$(echo "$ociResultErr" | TrimWhiteSpace)
    WriteStdErr "$ociResultErr"
    exitCode=$(ShiftStatusOnCliError $ErrorOciFailed $WarnOciFailed)
    if [[ $exitCode -ne $Success ]]; then
        if ! cause=$(ExtractOCIErrorCause "$ociResultErr" "$ShmMaxSize"); then
            cause=$ErrorCauseUnknown
        fi
        SetAlertMessage "${AlertOciFailed/\%1/$cause}"
    fi
    exit "$exitCode"
fi
existARecordInDnsZone=$ociResult
if [[ $existARecordInDnsZone != "true" ]]; then
    WriteStdErr "The target A record was NOT found in the DNS Zone. region: $actServerRegion"
    SetAlertMessage "$AlertNotExistRecord"
    exit $ErrorNotExistRecord
fi
WriteStdOut "The target A record was found in the DNS Zone. region: $actServerRegion"


#
# ドメイン名の名前解決確認
#
if [[ $nameResolution -eq 1 ]]; then
    if ! command -v host >/dev/null; then
        WriteStdErr "The host command is not found."
        SetAlertMessage "$AlertNotExistHostCmd"
        exit $WarnNotExistHostCmd
    fi
    if ! result=$(host -t A "$domainFqdn"); then
        WriteStdErr "$result"
        SetAlertMessage "$AlertNameResolution"
        exit $ErrorNameResolution
    fi
    resolved=$(echo "$result" | awk '{print $NF}')
    WriteDebug "resolved: $resolved"
    # 解決されたIPアドレスと設定したIPアドレスの一致確認
    if [[ $resolved != "$actServerIpAddress" ]]; then
        WriteStdErr "The resolved IP address ($resolved) does not match \
the configured IP address ($actServerIpAddress)."
        SetAlertMessage "$AlertUnmatchIPAddress"
        exit $ErrorUnmatchIPAddress
    fi
    WriteStdOut "Name resolution confirmation was successful."
fi


#
# [クラスタサーバが所属する全てのリージョン] が設定されている場合は、
# 待機系サーバが所属するリージョンのDNSに対してもAレコードの存在確認を行う
# なお、存在確認に失敗した場合はAレコードの作成処理を行う
# ※ただし作成処理に失敗した場合も、ステータスは正常とする
#
declare -A otherServers
if [[ $operationScope -eq $OperationAllServerRegion ]]; then
    # サーバ毎に設定しているリージョン・ゾーンOCIDを取得
    allServersXmlPath="/root/server"
    rscBaseXmlPath="/root/resource/${RscType}@${rscName}"
    allServers=$(GetClpConfValue "-e" "$allServersXmlPath" "" false) && {
        for server in $allServers; do
            regionXmlPath="$rscBaseXmlPath/parameters/region"
            region=$(GetClpConfValue "-g" "$regionXmlPath" "$server" false) || continue
            zoneOcidXmlPath="$rscBaseXmlPath/parameters/zoneid"
            zoneOcid=$(GetClpConfValue "-g" "$zoneOcidXmlPath" "$server" false) || continue
            otherServers["$region"]=$zoneOcid
        done

        # 現用系サーバが所属するリージョンのDNSに対しては既に設定済みのため除外
        unset "otherServers[$actServerRegion]"

        for otherRegion in "${!otherServers[@]}"; do
            otherZoneOcid="${otherServers[$otherRegion]}"
            WriteDebug "otherRegion: $otherRegion"
            WriteDebug "otherZoneOcid: $otherZoneOcid"

            #
            # OCI CLIの実行コマンドライン設定
            #
            ociCmdLine="oci dns record rrset get"
            ociCmdLine+=" --domain \"$domainFqdn\""
            ociCmdLine+=" --zone-name-or-id \"$otherZoneOcid\""
            ociCmdLine+=" --rtype A"
            ociCmdLine+=" --query \"data.items[0].rdata=='$actServerIpAddress'\""
            ociCmdLine+=" --region \"$otherRegion\""
            WriteStdOut "[CommandLine] $ociCmdLine"

            eval "$(eval "timeout -s SIGKILL $ociTimeoutSec $ociCmdLine" \
                        2> >(ociResultErr=$(cat); declare -p ociResultErr) \
                        1> >(ociResult=$(cat); declare -p ociResult); \
                        ociExitCode=$?; declare -p ociExitCode )"
            if [[ $ociExitCode -eq $ExitCodeWithTimeout ]]; then
                # OCI CLIタイムアウト
                WriteStdErr "The OCI CLI command timed out."
                WriteStdErr "Skip processing for DNS of the region to which Server $server belongs."
                continue
            fi
            if [[ $ociExitCode -ne $ExCmdSuccess ]]; then
                # OCI CLI異常終了
                WriteStdErr "The OCI CLI command failed. ($ociExitCode)"
                ociResultErr=$(echo "$ociResultErr" | TrimWhiteSpace)
                WriteStdErr "$ociResultErr"
                WriteStdErr "Skip processing for DNS of the region to which Server $server belongs."
                continue
            fi
            existARecordInDnsZone=$ociResult

            # ------------------------------------------------
            #
            # 対象のレコードが存在しない場合は、作成処理を実施
            #
            # ------------------------------------------------
            if [[ $existARecordInDnsZone != "true" ]]; then
                WriteStdErr "Attempt to create an A record. region: $otherRegion"

                #
                # '<インストールディレクトリ>/work/ocdns' の作成
                #
                umask 033
                installDir=$(cd "$(dirname "$0")/../.." && pwd)
                workDir="${installDir}/work/${RscType}"
                mkdir -p "$workDir"


                #
                # OCI CLIのオプションに指定するJSONファイルの作成
                # '<インストールディレクトリ>/work/ocdns/<リソース名>.json'
                #
                jsonFileName="${rscName}.json"
                jsonFilePath="${workDir}/${jsonFileName}"
                cat << EOF > "$jsonFilePath"
[
    {
        "domain": "$domainFqdn",
        "rdata": "$actServerIpAddress",
        "rtype": "A",
        "ttl": "$ttlSec"
    }
]
EOF
                #
                # OCI CLIの実行コマンドライン設定
                #
                ociCmdLine="oci dns record rrset update"
                ociCmdLine+=" --domain \"$domainFqdn\""
                ociCmdLine+=" --zone-name-or-id \"$otherZoneOcid\""
                ociCmdLine+=" --rtype A"
                ociCmdLine+=" --items \"file://$jsonFilePath\""
                ociCmdLine+=" --region \"$otherRegion\""
                ociCmdLine+=" --force"
                if $DebugMode; then
                    ociCmdLine+=" --debug"
                fi
                WriteStdOut "[CommandLine] $ociCmdLine"

                eval "$(eval "timeout -s SIGKILL $ociTimeoutSec $ociCmdLine" \
                            2> >(ociResultErr=$(cat); declare -p ociResultErr) \
                            1> >(ociResult=$(cat); declare -p ociResult); \
                            ociExitCode=$?; declare -p ociExitCode )"
                if [[ $ociExitCode -eq $ExitCodeWithTimeout ]]; then
                    # OCI CLIタイムアウト
                    WriteStdErr "The OCI CLI command timed out."
                    WriteStdErr "Skip processing for DNS of the region to which Server $server belongs."
                    rm -f "$jsonFilePath"
                    continue
                fi
                if [[ $ociExitCode -ne $ExCmdSuccess ]]; then
                    # OCI CLI異常終了
                    WriteStdErr "The OCI CLI command failed. ($ociExitCode)"
                    ociResultErr=$(echo "$ociResultErr" | TrimWhiteSpace)
                    WriteStdErr "$ociResultErr"
                    WriteStdErr "Skip processing for DNS of the region to which Server $server belongs."
                    rm -f "$jsonFilePath"
                    continue
                fi
                WriteDebug "$ociResult"
                WriteStdOut "Created an A record in the DNS Zone. region: $otherRegion"
                rm -f "$jsonFilePath"
            else
                WriteStdOut "The target A record was found in the DNS Zone. region: $otherRegion"
            fi
        done
    }
fi


# ----------------------------------------------------------
#
# 処理終了(正常)
#
# ----------------------------------------------------------
WriteStdOut "Succeeded in monitoring for Oracle Cloud DNS monitoring resource."
exit $Success
