Source code for mcp_server_webcrawl.crawlers.katana.crawler

from pathlib import Path

from mcp_server_webcrawl.crawlers.base.indexed import IndexedCrawler
from mcp_server_webcrawl.crawlers.katana.adapter import get_sites, get_resources
from mcp_server_webcrawl.utils.logger import get_logger

logger = get_logger()

[docs] class KatanaCrawler(IndexedCrawler): """ A crawler implementation for HTTP text files. Provides functionality for accessing and searching web content from captured HTTP exchanges. """
[docs] def __init__(self, datasrc: Path): """ Initialize the HTTP text crawler with a data source directory. Args: datasrc: The input argument as Path, it must be a directory containing subdirectories with HTTP text files """ super().__init__(datasrc, get_sites, get_resources)