<?php
/**
 * CDN Image Rewriter
 *
 * Rewrites image URLs to use the wpimg.io optimization service.
 * Uses whitelist approach: only CDN files in specific directories.
 */

if (!defined('ABSPATH')) {
    exit;
}

class WP_Image_Optimizer_CDN_Rewriter {

    /**
     * Supported image extensions (regex pattern without anchors)
     * Used for both single URL and HTML processing
     */
    private const SUPPORTED_EXTENSIONS = 'jpe?g|png|webp|gif';

    /**
     * Default directories to include (whitelist)
     * Covers all wp-content (uploads, themes, gallery, blogs.dir, etc.)
     */
    private const DEFAULT_INCLUDED = [
        'wp-content/',
        'wp-includes/',
    ];

    /**
     * Default patterns to exclude (blacklist)
     * Supports wildcards: *.php, *cache*, etc.
     */
    private const DEFAULT_EXCLUDED = [
        '*.php',
        '/wp-content/plugins/*',
        '/wp-content/mu-plugins/*',
        '/wp-content/languages/*',
        '/wp-content/*cache*/*',
    ];

    /** @var string */
    private $cdn_base;
    /** @var string */
    private $site_host;
    /** @var string */
    private $site_path;
    /** @var string */
    private $api_key;
    /** @var string */
    private $cache_version;
    /** @var array */
    private $included;
    /** @var array */
    private $excluded;

    /**
     * Safe wrapper for preg_replace_callback that prevents WSOD
     * If PCRE backtrack limit is exceeded, returns original subject instead of null
     */
    private function safe_preg_replace_callback(string $pattern, callable $callback, string $subject): string {
        $result = preg_replace_callback($pattern, $callback, $subject);
        return $result !== null ? $result : $subject;
    }

    public function __construct(string $cdn_url, string $api_key = '') {
        // CDN URL is provided by the API (supports custom CNAME domains)
        $this->cdn_base = rtrim($cdn_url, '/');
        $this->api_key = $api_key;

        $parsed = parse_url(get_site_url());
        $this->site_host = ($parsed['host'] ?? '') . (isset($parsed['port']) ? ':' . $parsed['port'] : '');
        $this->site_path = rtrim($parsed['path'] ?? '', '/');

        // Global cache version (incremented via "Purge Cache" button)
        $this->cache_version = WP_Image_Optimizer_Settings::get_cache_version();

        // Allow filtering included/excluded paths
        $this->included = apply_filters('wpio_included_directories', self::DEFAULT_INCLUDED);
        $this->excluded = apply_filters('wpio_excluded_paths', self::DEFAULT_EXCLUDED);
    }

    /**
     * Build signed CDN URL with version for cache busting
     */
    private function build_cdn_url(string $path): string {
        // Use global cache version (user clicks "Purge Cache" to increment)
        $version = $this->cache_version;

        // Full path including site_path for subdirectory installs (e.g., /blog)
        $full_path = $this->site_path . $path;

        // Generate 6-char signature from full path (must match what worker receives)
        $sig = substr(hash_hmac('sha256', $full_path . $version, $this->api_key), 0, 6);

        return $this->cdn_base . '/' . $version . '.' . $sig . $full_path;
    }

    /**
     * Check if a path is in one of the included directories (whitelist)
     */
    private function is_path_included(string $path): bool {
        // Remove leading slash for comparison
        $path_normalized = ltrim($path, '/');

        foreach ($this->included as $included_dir) {
            $included_normalized = ltrim($included_dir, '/');
            if (strpos($path_normalized, $included_normalized) === 0) {
                return true;
            }
        }
        return false;
    }

    /**
     * Check if a path matches an exclusion pattern (blacklist)
     * Supports wildcards: *.php, /cache/*, etc.
     */
    private function is_path_excluded(string $path): bool {
        foreach ($this->excluded as $pattern) {
            // Normalize: add leading slash if pattern doesn't start with * or /
            if (strpos($pattern, '*') !== 0 && strpos($pattern, '/') !== 0) {
                $pattern = '/' . $pattern;
            }

            // Exact match (no wildcards)
            if (strpos($pattern, '*') === false) {
                if ($pattern === $path) {
                    return true;
                }
                continue;
            }

            // Wildcard pattern → regex
            $prefix = '^';
            $suffix = '$';

            if (strpos($pattern, '*') === 0) {
                $prefix = '';
            }
            if (substr($pattern, -1) === '*') {
                $suffix = '';
            }

            $regex = '#' . $prefix . str_replace('\\*', '(.*)', preg_quote($pattern, '#')) . $suffix . '#';
            if (preg_match($regex, $path)) {
                return true;
            }
        }
        return false;
    }

    /**
     * Check if a path should be rewritten
     * Must be in whitelist AND not in blacklist
     */
    private function should_rewrite(string $path): bool {
        // Must be in included directories
        if (!$this->is_path_included($path)) {
            return false;
        }

        // Must not match exclusion patterns
        if ($this->is_path_excluded($path)) {
            return false;
        }

        return true;
    }

    /**
     * Rewrite a single image URL to CDN URL
     * Used by WordPress image filters (wp_get_attachment_url, etc.)
     *
     * @param string $url The original image URL
     * @return string The CDN URL or original if not applicable
     */
    public function rewrite_url(string $url): string {
        if (empty($url)) {
            return $url;
        }

        // Parse URL
        $parsed = parse_url($url);
        if (!$parsed || !isset($parsed['path'])) {
            return $url;
        }

        // Check if it's from our site (skip external URLs and already-CDN URLs)
        $url_host = $parsed['host'] ?? '';
        if ($url_host !== $this->site_host) {
            return $url;
        }

        // Get path, removing site_path prefix for subdirectory installs
        $path = $parsed['path'];
        if (!empty($this->site_path) && strpos($path, $this->site_path) === 0) {
            $path = substr($path, strlen($this->site_path));
        }

        // Check extension (must be a supported image format)
        if (!preg_match('/\.(?:' . self::SUPPORTED_EXTENSIONS . ')$/i', $path)) {
            return $url;
        }

        // Check whitelist/blacklist
        if (!$this->should_rewrite($path)) {
            return $url;
        }

        return $this->build_cdn_url($path);
    }

    /**
     * Process HTML and rewrite image URLs
     */
    public function process(string $html): string {
        if (function_exists('wpio_log')) {
            wpio_log('CDN Rewriter process() called, HTML length: ' . strlen($html));
            wpio_log('CDN base: ' . $this->cdn_base);
            wpio_log('Site host: ' . $this->site_host);
            wpio_log('Site path: ' . $this->site_path);
        }

        if (empty($html)) {
            if (function_exists('wpio_log')) {
                wpio_log('Empty HTML, returning');
            }
            return $html;
        }

        // Skip non-HTML content (XML sitemaps, JSON endpoints, etc.)
        // This catches cases where plugins output XML/JSON via template_redirect
        // without setting REST_REQUEST or other constants we check earlier
        foreach (headers_list() as $header) {
            if (stripos($header, 'Content-Type:') === 0 &&
                stripos($header, 'text/html') === false) {
                if (function_exists('wpio_log')) {
                    wpio_log('Skipping: non-HTML Content-Type detected');
                }
                return $html;
            }
        }

        // 1. Protect HTML comments FIRST (Gutenberg block metadata, conditional comments)
        //    Must be done before element protection since our placeholders shouldn't
        //    be mistaken for comments
        $protected = [];
        $html = $this->safe_preg_replace_callback(
            '#<!--[\s\S]*?-->#',
            function($m) use (&$protected) {
                $key = "\x00WPIO_" . count($protected) . "\x00";
                $protected[$key] = $m[0];
                return $key;
            },
            $html
        );

        // 2. Protect blocks that should never have URLs rewritten:
        //    - script: JavaScript code, JSON data
        //    - textarea: Editor content, user input
        //    - noscript: Fallback content
        //    - template: Inert DOM for JavaScript cloning
        //    - code/pre: Documentation and code examples
        $html = $this->safe_preg_replace_callback(
            '#<(script|textarea|noscript|template|code|pre)\b[^>]*>.*?</\1>#is',
            function($m) use (&$protected) {
                $key = "\x00WPIO_" . count($protected) . "\x00";
                $protected[$key] = $m[0];
                return $key;
            },
            $html
        );

        $host = preg_quote($this->site_host, '#');
        $path = preg_quote($this->site_path, '#');
        // Match supported image extensions ONLY at natural boundaries
        // The positive lookahead requires extension to be followed by:
        // - ? (query string)
        // - # (fragment) - escaped as \# since # is our regex delimiter
        // - " or ' (quote ending attribute)
        // - > (tag end for unquoted values)
        // - whitespace
        // - ) (closing CSS url() or srcset parens)
        // - , (srcset separator)
        // - & (HTML entity like &quot;)
        // - $ (end of string)
        // This prevents matching "photo.jpgx" or "photo.jpg/download"
        $ext = '(?:' . self::SUPPORTED_EXTENSIONS . ')(?=[?\#"\'>\s),&]|$)';

        // 3. Rewrite absolute URLs: https://example.com/wp-content/uploads/img.jpg
        //    or with subdir: https://example.com/blog/wp-content/uploads/img.jpg
        //    Note: URL can end with query string (?...) which is stripped
        //    Note: Uses non-greedy +? to handle filenames with parentheses like image(1).jpg
        //    Note: Parentheses are allowed in path - extension lookahead identifies URL boundary
        $html = $this->safe_preg_replace_callback(
            '#https?://' . $host . $path . '(/[^\s"\'<>,]+?\.' . $ext . ')(?:\?[^\s"\'<>)]*)?#i',
            function($m) {
                // $m[1] is the path portion: /wp-content/uploads/img.jpg (query stripped)
                // Only rewrite if path is in whitelist and not in blacklist
                if (!$this->should_rewrite($m[1])) {
                    return $m[0]; // Return original URL unchanged
                }
                return $this->build_cdn_url($m[1]);
            },
            $html
        );

        // 4. Protocol-relative: //example.com/wp-content/uploads/img.jpg
        $html = $this->safe_preg_replace_callback(
            '#//' . $host . $path . '(/[^\s"\'<>,]+?\.' . $ext . ')(?:\?[^\s"\'<>)]*)?#i',
            function($m) {
                if (!$this->should_rewrite($m[1])) {
                    return $m[0];
                }
                return $this->build_cdn_url($m[1]);
            },
            $html
        );

        // 5. Relative URLs: /wp-content/uploads/img.jpg (preceded by delimiter)
        //    Delimiters: " ' ( = , space (covers src="...", url(...), srcset="..., ...")
        $html = $this->safe_preg_replace_callback(
            '#(["\'(=,]\s*)' . $path . '(/[^\s"\'<>,]+?\.' . $ext . ')(?:\?[^\s"\'<>)]*)?#i',
            function($m) {
                // $m[1] is the delimiter, $m[2] is the path (query stripped)
                if (!$this->should_rewrite($m[2])) {
                    return $m[0];
                }
                return $m[1] . $this->build_cdn_url($m[2]);
            },
            $html
        );

        // 6. Restore protected blocks
        if (!empty($protected)) {
            $html = str_replace(array_keys($protected), array_values($protected), $html);
        }

        if (function_exists('wpio_log')) {
            wpio_log('CDN Rewriter finished processing');
        }

        return $html;
    }
}
