# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Security-hardened path utilities for Component v2.

This module centralizes path validation and resolution logic used by the
Component v2 registration and file access code paths. All helpers here are
designed to prevent path traversal, insecure prefix checks, and symlink escapes
outside of a declared package root.
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import Final

from streamlit.errors import StreamlitComponentRegistryError
from streamlit.logger import get_logger

_LOGGER: Final = get_logger(__name__)


class ComponentPathUtils:
    """Utility class for component path operations and security validation."""

    @staticmethod
    def has_glob_characters(path: str) -> bool:
        """Check if a path contains glob pattern characters.

        Parameters
        ----------
        path : str
            The path to check

        Returns
        -------
        bool
            True if the path contains glob characters
        """
        return any(char in path for char in ["*", "?", "[", "]"])

    @staticmethod
    def validate_path_security(path: str) -> None:
        """Validate that a path doesn't contain security vulnerabilities.

        Parameters
        ----------
        path : str
            The path to validate

        Raises
        ------
        StreamlitComponentRegistryError
            If the path contains security vulnerabilities like path traversal attempts
        """
        ComponentPathUtils._assert_relative_no_traversal(path, label="component paths")

    @staticmethod
    def resolve_glob_pattern(pattern: str, package_root: Path) -> Path:
        """Resolve a glob pattern to a single file path with security checks.

        Parameters
        ----------
        pattern : str
            The glob pattern to resolve
        package_root : Path
            The package root directory for security validation

        Returns
        -------
        Path
            The resolved file path

        Raises
        ------
        StreamlitComponentRegistryError
            If zero or more than one file matches the pattern, or if security
            checks fail (path traversal attempts)
        """
        # Ensure pattern is relative and doesn't contain path traversal attempts
        ComponentPathUtils._assert_relative_no_traversal(pattern, label="glob patterns")

        # Use glob from the package root so subdirectory patterns are handled correctly
        matching_files = list(package_root.glob(pattern))

        # Ensure all matched files are within package_root (security check)
        validated_files = []
        for file_path in matching_files:
            try:
                # Resolve to absolute path and check if it's within package_root
                resolved_path = file_path.resolve()
                package_root_resolved = package_root.resolve()

                # Check if the resolved path is within the package root using
                # pathlib's relative path check to avoid prefix-matching issues
                if not resolved_path.is_relative_to(package_root_resolved):
                    _LOGGER.warning(
                        "Skipping file outside package root: %s", resolved_path
                    )
                    continue

                validated_files.append(resolved_path)
            except (OSError, ValueError) as e:
                _LOGGER.warning("Failed to resolve path %s: %s", file_path, e)
                continue

        # Ensure exactly one file matches
        if len(validated_files) == 0:
            raise StreamlitComponentRegistryError(
                f"No files found matching pattern '{pattern}' in package root {package_root}"
            )
        if len(validated_files) > 1:
            file_list = ", ".join(str(f) for f in validated_files)
            raise StreamlitComponentRegistryError(
                f"Multiple files found matching pattern '{pattern}': {file_list}. "
                "Exactly one file must match the pattern."
            )

        return Path(validated_files[0])

    @staticmethod
    def _assert_relative_no_traversal(path: str, *, label: str) -> None:
        """Raise if ``path`` is absolute or contains ``..`` segments.

        Parameters
        ----------
        path : str
            Path string to validate.
        label : str
            Human-readable label used in error messages (e.g., "component paths").
        """
        # Absolute path checks (POSIX, Windows drive-letter, UNC)
        is_windows_drive_abs = (
            len(path) >= 3
            and path[0].isalpha()
            and path[1] == ":"
            and path[2] in ("/", "\\")
        )
        is_unc_abs = path.startswith("\\\\")

        # Consider rooted backslash paths "\\dir" as absolute on Windows-like inputs
        is_rooted_backslash = path.startswith("\\") and not is_unc_abs

        if (
            os.path.isabs(path)
            or is_windows_drive_abs
            or is_unc_abs
            or is_rooted_backslash
        ):
            raise StreamlitComponentRegistryError(
                f"Absolute paths are not allowed in {label}: {path}"
            )

        # Segment-based traversal detection to avoid false positives (e.g. "file..js")
        normalized = path.replace("\\", "/")
        segments = [seg for seg in normalized.split("/") if seg != ""]
        if any(seg == ".." for seg in segments):
            raise StreamlitComponentRegistryError(
                f"Path traversal attempts are not allowed in {label}: {path}"
            )

    @staticmethod
    def ensure_within_root(abs_path: Path, root: Path, *, kind: str) -> None:
        """Ensure that abs_path is within root; raise if not.

        Parameters
        ----------
        abs_path : Path
            Absolute file path
        root : Path
            Root directory path
        kind : str
            Human-readable descriptor for error messages (e.g., "js" or "css")

        Raises
        ------
        StreamlitComponentRegistryError
            If the path cannot be resolved or if the resolved path does not
            reside within ``root`` after following symlinks.
        """
        try:
            resolved = abs_path.resolve()
            root_resolved = root.resolve()
        except Exception as e:
            raise StreamlitComponentRegistryError(
                f"Failed to resolve {kind} path '{abs_path}': {e}"
            ) from e

        # Use Path.is_relative_to to avoid insecure prefix-based checks
        if not resolved.is_relative_to(root_resolved):
            raise StreamlitComponentRegistryError(
                f"{kind} path '{abs_path}' is outside the declared asset_dir '{root}'."
            )

    @staticmethod
    def looks_like_inline_content(value: str) -> bool:
        r"""Heuristic to detect inline JS/CSS content strings.

        Treat a string as a file path ONLY if it looks path-like:
        - Does not contain newlines
        - Contains glob characters (*, ?, [, ])
        - Starts with ./, /, or \
        - Contains a path separator ("/" or "\\")
        - Or ends with a common asset extension like .js, .mjs, .cjs, or .css

        Otherwise, treat it as inline content.

        Parameters
        ----------
        value : str
            The string to classify as inline content or a file path.

        Returns
        -------
        bool
            True if ``value`` looks like inline content; False if it looks like a
            file path.
        """
        s = value.strip()
        # If the value contains newlines, it's definitely inline content
        if "\n" in s or "\r" in s:
            return True
        # Glob patterns indicate path-like
        if ComponentPathUtils.has_glob_characters(s):
            return False
        # Obvious path prefixes
        if s.startswith(("./", "/", "\\")):
            return False
        # Any path separator
        if "/" in s or "\\" in s:
            return False

        return not (s.lower().endswith((".js", ".css", ".mjs", ".cjs")))
