Source code for mcp_server_webcrawl.models.sites

from datetime import datetime
from typing import Final
from pathlib import Path

from mcp_server_webcrawl.models import METADATA_VALUE_TYPE
from mcp_server_webcrawl.utils import to_isoformat_zulu

SITES_TOOL_NAME: Final[str] = "webcrawl_sites"
SITES_FIELDS_REQUIRED: Final[list[str]] = ["id", "url"]
SITES_FIELDS_DEFAULT: Final[list[str]] = SITES_FIELDS_REQUIRED + ["created", "modified"]


[docs]
class SiteResult:
    """
    Represents a website or crawl directory result.
    """


[docs]
    def __init__(
        self,
        id: int,
        url: str | None = None,
        path: Path = None,
        created: datetime | None = None,
        modified: datetime | None = None,
        robots: str | None = None,
        metadata: dict[str, METADATA_VALUE_TYPE] | None = None
    ):
        """
        Initialize a SiteResult instance.

        Args:
            id: site identifier
            url: site URL
            path: path to site data, different from datasrc
            created: creation timestamp
            modified: last modification timestamp
            robots: robots.txt content
            metadata: additional metadata for the site
        """
        self.id = id
        self.url = url
        self.path = path
        self.created = created
        self.modified = modified
        self.robots = robots
        self.metadata = metadata or {}



[docs]
    def to_dict(self) -> dict[str, METADATA_VALUE_TYPE]:
        """
        Convert the object to a dictionary suitable for JSON serialization.
        """
        result: dict[str, METADATA_VALUE_TYPE] = {
            "id": self.id,
            "url": self.url,
            "created": to_isoformat_zulu(self.created) if self.created else None,
            "modified": to_isoformat_zulu(self.modified) if self.modified else None,
            "robots": self.robots,
            "metadata": self.metadata if self.metadata else None,
        }

        return {k: v for k, v in result.items() if v is not None and not (k == "metadata" and v == {})}



[docs]
    def to_forcefield_dict(self, forcefields: list[str]) -> dict[str, METADATA_VALUE_TYPE]:
        """
        Convert the object to a dictionary with specified fields forced to exist.

        Creates a dictionary that includes all non-None values from the forcefields list,
        and ensuring all fields in the forcefields list exist, even if null.

        Args:
            forcefields: list of field names that must appear in the output dictionary
                with at least a None value

        Returns:
            Dictionary containing all non-None object attributes, plus forced fields
            set to None if not already present
        """
        # None self-annihilates in filter, forcefields can force their existence, as null
        result = {}
        if forcefields:
            result = {k: None for k in forcefields}
        result.update(self.to_dict())
        return result