#!/bin/bash
# =============================================================
# OTel PII Redaction Demo — Redeployment Script
# =============================================================
# Usage: ./deploy.sh <WORKSPACE_HOST> <CATALOG> <SOURCE_SCHEMA> <TARGET_SCHEMA> <TABLE_PREFIX>
#
# Example:
#   ./deploy.sh https://my-workspace.cloud.databricks.com hanna_moazam claude_ingest traces_redacted claude_code
#
# Prerequisites:
#   - Databricks CLI authenticated to the target workspace
#   - Unity Catalog enabled, AI Functions available
#   - A serverless SQL warehouse

set -euo pipefail

WORKSPACE_HOST="${1:?Usage: ./deploy.sh <WORKSPACE_HOST> <CATALOG> <SOURCE_SCHEMA> <TARGET_SCHEMA> <TABLE_PREFIX>}"
CATALOG="${2:?Missing CATALOG}"
SOURCE_SCHEMA="${3:?Missing SOURCE_SCHEMA}"
TARGET_SCHEMA="${4:?Missing TARGET_SCHEMA}"
TABLE_PREFIX="${5:?Missing TABLE_PREFIX}"

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
WORKSPACE_PATH="/Workspace/Users/$(databricks auth describe --host "$WORKSPACE_HOST" 2>/dev/null | grep -oP 'User:\s+\K.*' || echo 'UNKNOWN_USER')/otel-pii-redaction"

echo "=== OTel PII Redaction Demo Deployment ==="
echo "Workspace:    $WORKSPACE_HOST"
echo "Catalog:      $CATALOG"
echo "Source:       $CATALOG.$SOURCE_SCHEMA"
echo "Target:       $CATALOG.$TARGET_SCHEMA"
echo "Table prefix: $TABLE_PREFIX"
echo "Remote path:  $WORKSPACE_PATH"
echo ""

# Step 1: Upload files to workspace
echo "--- Step 1: Uploading files to workspace ---"
databricks workspace mkdirs "$WORKSPACE_PATH" --host "$WORKSPACE_HOST"
databricks workspace import "$WORKSPACE_PATH/pii_redaction_pipeline.sql" \
  --file "$SCRIPT_DIR/pii_redaction_pipeline.sql" \
  --format AUTO --language SQL --overwrite \
  --host "$WORKSPACE_HOST"
echo "Files uploaded."

# Step 2: Create the target schema
echo ""
echo "--- Step 2: Creating target schema ---"
databricks sql execute --statement "CREATE SCHEMA IF NOT EXISTS ${CATALOG}.${TARGET_SCHEMA}" \
  --host "$WORKSPACE_HOST" 2>/dev/null || echo "Schema may already exist."

# Step 3: Create the SDP pipeline
echo ""
echo "--- Step 3: Creating SDP pipeline ---"
PIPELINE_JSON=$(cat <<EOF
{
  "name": "otel-pii-redaction",
  "catalog": "$CATALOG",
  "schema": "$TARGET_SCHEMA",
  "serverless": true,
  "continuous": false,
  "channel": "CURRENT",
  "configuration": {
    "source_catalog": "$CATALOG",
    "source_schema": "$SOURCE_SCHEMA",
    "table_prefix": "$TABLE_PREFIX",
    "pii_categories": "'email','phone','ssn','credit_card','name','address'"
  },
  "libraries": [
    {"file": {"path": "$WORKSPACE_PATH/pii_redaction_pipeline.sql"}}
  ],
  "tags": {
    "created_by": "otel-pii-redaction-deploy-script"
  }
}
EOF
)
PIPELINE_RESULT=$(echo "$PIPELINE_JSON" | databricks pipelines create --json @- --host "$WORKSPACE_HOST" 2>&1) || true
echo "$PIPELINE_RESULT"
PIPELINE_ID=$(echo "$PIPELINE_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('pipeline_id','UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
echo "Pipeline ID: $PIPELINE_ID"

# Step 4: Trigger pipeline run
if [ "$PIPELINE_ID" != "UNKNOWN" ]; then
  echo ""
  echo "--- Step 4: Triggering pipeline run ---"
  databricks pipelines start-update "$PIPELINE_ID" --host "$WORKSPACE_HOST" || echo "Failed to trigger pipeline. Start it manually from the UI."
fi

# Step 5: Configure auto-TTL on raw tables (90-day retention)
echo ""
echo "--- Step 5: Configuring auto-TTL on raw tables ---"
databricks sql execute --statement "ALTER TABLE ${CATALOG}.${SOURCE_SCHEMA}.${TABLE_PREFIX}_otel_spans DELETE ROWS 90 DAYS AFTER time" \
  --host "$WORKSPACE_HOST" 2>/dev/null || echo "Failed to set auto-TTL on spans table."
databricks sql execute --statement "ALTER TABLE ${CATALOG}.${SOURCE_SCHEMA}.${TABLE_PREFIX}_otel_logs DELETE ROWS 90 DAYS AFTER time" \
  --host "$WORKSPACE_HOST" 2>/dev/null || echo "Failed to set auto-TTL on logs table."
echo "Auto-TTL configured. Predictive optimization will handle DELETE, PURGE, and VACUUM automatically."

# Step 6: Create unified view (after pipeline completes)
echo ""
echo "--- Step 6: Note ---"
echo "After the pipeline run completes, create the unified view by running:"
echo "  unified_view.sql (replace \${...} variables with: $CATALOG, $TARGET_SCHEMA, $TABLE_PREFIX)"
echo ""
echo "=== Deployment complete ==="
echo "Pipeline ID: $PIPELINE_ID"
echo "Monitor at: $WORKSPACE_HOST/#joblist/pipelines/$PIPELINE_ID"
