Source code for mcp_server_webcrawl.crawlers.interrobot.tests

import asyncio
from logging import Logger
from mcp.types import EmbeddedResource, ImageContent, TextContent
from mcp_server_webcrawl.crawlers.base.tests import BaseCrawlerTests
from mcp_server_webcrawl.crawlers.interrobot.crawler import InterroBotCrawler
from mcp_server_webcrawl.models.resources import RESOURCES_TOOL_NAME
from mcp_server_webcrawl.crawlers import get_fixture_directory
from mcp_server_webcrawl.utils.logger import get_logger

# these IDs belong to a db fixture, static for testing
EXAMPLE_SITE_ID = 1
PRAGMAR_SITE_ID = 2

logger: Logger = get_logger()

[docs] class InterroBotTests(BaseCrawlerTests): """ Test suite for the InterroBot crawler implementation. Uses all wrapped test methods from BaseCrawlerTests plus InterroBot-specific features. """
[docs] def setUp(self): """ Set up the test environment with fixture data. """ super().setUp() self.fixture_path = get_fixture_directory() / "interrobot" / "interrobot.v2.db"
[docs] def test_interrobot_pulse(self): """ Test basic crawler initialization. """ crawler = InterroBotCrawler(self.fixture_path) self.assertIsNotNone(crawler)
[docs] def test_interrobot_sites(self): """ Test site retrieval API functionality. """ crawler = InterroBotCrawler(self.fixture_path) # Note: InterroBot uses site ID 2 for pragmar instead of calculating from string self.run_pragmar_site_tests(crawler, PRAGMAR_SITE_ID)
[docs] def test_interrobot_resources(self): """ Test resource retrieval API functionality with various parameters. """ crawler = InterroBotCrawler(self.fixture_path) self.run_sites_resources_tests(crawler, PRAGMAR_SITE_ID, EXAMPLE_SITE_ID)
[docs] def test_interrobot_images(self): """ Test InterroBot-specific image handling and thumbnails. """ crawler = InterroBotCrawler(self.fixture_path) self.run_pragmar_image_tests(crawler, PRAGMAR_SITE_ID)
[docs] def test_interrobot_random_sort(self): """ Test random sort functionality using the '?' sort parameter. """ crawler = InterroBotCrawler(self.fixture_path) self.run_pragmar_sort_tests(crawler, PRAGMAR_SITE_ID)
[docs] def test_interrobot_content_parsing(self): """ Test content type detection and parsing. """ crawler = InterroBotCrawler(self.fixture_path) self.run_pragmar_content_tests(crawler, PRAGMAR_SITE_ID, False)
[docs] def test_interrobot_mcp_features(self): """ Test InterroBot-specific MCP tool functionality. """ crawler = InterroBotCrawler(self.fixture_path) list_tools_result = asyncio.run(crawler.mcp_list_tools()) self.assertIsNotNone(list_tools_result)
[docs] def test_thumbnails_sync(self): """ Test thumbnail generation functionality (InterroBot-specific). """ asyncio.run(self.__test_thumbnails())
async def __test_thumbnails(self): """ Test thumbnails are a special case for InterroBot """ crawler = InterroBotCrawler(self.fixture_path) thumbnail_args = { "datasrc": crawler.datasrc, "sites": [2], "extras": ["thumbnails"], "query": "type: img AND url: *.png", "limit": 4, } thumbnail_result: list[TextContent | ImageContent | EmbeddedResource] = await crawler.mcp_call_tool( RESOURCES_TOOL_NAME, thumbnail_args ) if len(thumbnail_result) > 1: self.assertTrue( thumbnail_result[1].type == "image", "ImageContent should be included in thumbnails response" )
[docs] def test_interrobot_advanced_site_features(self): """ Test InterroBot-specific site features like robots field. """ crawler = InterroBotCrawler(self.fixture_path) # robots field retrieval site_one_field_json = crawler.get_sites_api(ids=[1], fields=["robots"]) if site_one_field_json.total > 0: result_dict = site_one_field_json._results[0].to_dict() self.assertIn("robots", result_dict, "robots field should be present in response") # multiple custom fields site_multiple_fields_json = crawler.get_sites_api(ids=[1], fields=["robots", "created"]) if site_multiple_fields_json.total > 0: result = site_multiple_fields_json._results[0].to_dict() self.assertIn("robots", result, "robots field should be present in response") self.assertIn("created", result, "created field should be present in response")
[docs] def test_report(self): """ Test thumbnail generation functionality (InterroBot-specific). """ crawler = InterroBotCrawler(self.fixture_path) logger.info(self.run_pragmar_report(crawler, PRAGMAR_SITE_ID, "InterroBot"))