Source code for mcp_server_webcrawl.interactive.highlights

import re
import curses

from dataclasses import dataclass
from typing import List

from mcp_server_webcrawl.interactive.ui import safe_addstr

[docs] @dataclass class HighlightSpan: """ Represents a highlight span in text """ start: int end: int text: str def __str__(self) -> str: return f"[{self.start}:{self.end} '{self.text}']"
[docs] class HighlightProcessor: """ Shared highlight processing utilities """ QUOTED_PHRASE_PATTERN = re.compile(r'"([^"]+)"') WORD_PATTERN = re.compile(r"\b\w+\b") SNIPPET_MARKER_PATTERN = re.compile(r"\*\*([a-zA-Z\-_' ]+)\*\*") IGNORE_WORDS = {"AND", "OR", "NOT", "and", "or", "not", "type", "status", "size", "url", "id"}
[docs] @staticmethod def extract_search_terms(query: str) -> List[str]: """ Extract search terms from query, handling quoted phrases and individual keywords. """ if not query or not query.strip(): return [] search_terms = [] for match in HighlightProcessor.QUOTED_PHRASE_PATTERN.finditer(query): phrase = match.group(1).strip() if phrase: search_terms.append(phrase) remaining_query = HighlightProcessor.QUOTED_PHRASE_PATTERN.sub('', query) # extract individual words for match in HighlightProcessor.WORD_PATTERN.finditer(remaining_query): word = match.group().strip() if word and word not in HighlightProcessor.IGNORE_WORDS and len(word) > 2: search_terms.append(word) return search_terms
[docs] @staticmethod def find_highlights_in_text(text: str, search_terms: List[str]) -> List[HighlightSpan]: """ Find all highlight spans in text for the given search terms. """ if not text or not search_terms: return [] highlights = [] escaped_terms = [re.escape(term.strip("\"'")) for term in search_terms] pattern = re.compile(rf"\b({'|'.join(escaped_terms)})\b", re.IGNORECASE) for match in pattern.finditer(text): span = HighlightSpan( start=match.start(), end=match.end(), text=match.group() ) highlights.append(span) return HighlightProcessor.merge_overlapping_highlights(highlights, text)
[docs] @staticmethod def extract_snippet_highlights(snippet_text: str) -> tuple[str, List[HighlightSpan]]: """ Extract highlights from snippet text with **markers**, returning clean text and highlights. """ if not snippet_text: return "", [] normalized_text = re.sub(r"\s+", " ", snippet_text.strip()) clean_text = "" highlights = [] last_end = 0 for match in HighlightProcessor.SNIPPET_MARKER_PATTERN.finditer(normalized_text): # text before this match clean_text += normalized_text[last_end:match.start()] # highlighted text (without markers) highlight_text = match.group(1) highlight_start = len(clean_text) clean_text += highlight_text highlight_end = len(clean_text) span: HighlightSpan = HighlightSpan( start=highlight_start, end=highlight_end, text=highlight_text ) highlights.append(span) last_end = match.end() # remaining text clean_text += normalized_text[last_end:] return clean_text.strip(), highlights
[docs] @staticmethod def merge_overlapping_highlights(highlights: List[HighlightSpan], text: str) -> List[HighlightSpan]: """Merge overlapping or adjacent highlight spans.""" if not highlights: return [] # sort by start position sorted_highlights = sorted(highlights, key=lambda h: h.start) merged = [] for highlight in sorted_highlights: if not merged: merged.append(highlight) else: last = merged[-1] if highlight.start <= last.end: # overlapping/adjacent - merge them end = max(last.end, highlight.end) merged_text = text[last.start:end] merged[-1] = HighlightSpan( start=last.start, end=end, text=merged_text ) else: merged.append(highlight) return merged
[docs] @staticmethod def render_text_with_highlights( stdscr: curses.window, text: str, highlights: List[HighlightSpan], x: int, y: int, max_width: int, normal_style: int, hit_style: int ) -> None: """ Render text with highlights applied. """ if not text.strip(): return display_text: str = text[:max_width] if len(text) > max_width else text visible_highlights: list[str] = [h for h in highlights if h.start < len(display_text)] current_x: int = x pos: int = 0 try: for highlight in visible_highlights: # text before highlight if highlight.start > pos: text_before: str = display_text[pos:highlight.start] safe_addstr(stdscr, y, current_x, text_before, normal_style) current_x += len(text_before) pos = highlight.start # highlighted text highlight_end: int = min(highlight.end, len(display_text)) highlighted_text: str = display_text[highlight.start:highlight_end] if current_x + len(highlighted_text) <= x + max_width: safe_addstr(stdscr, y, current_x, highlighted_text, hit_style) current_x += len(highlighted_text) pos = highlight_end # remaining text if pos < len(display_text): remaining_text: str = display_text[pos:] remaining_width: int = max_width - (current_x - x) if remaining_width > 0: safe_addstr(stdscr, y, current_x, remaining_text[:remaining_width], normal_style) except curses.error: pass