From 84308bf8128feb5d34c0699bf5638678426cbe0a Mon Sep 17 00:00:00 2001 From: Voronin Sergei Date: Sun, 31 May 2026 17:21:18 +1200 Subject: [PATCH] Add Slack message reference parsing utilities --- docs/english/concepts/message-listening.md | 19 +- slack_bolt/message_references.py | 189 ++++++++++++++++++++ tests/slack_bolt/test_message_references.py | 158 ++++++++++++++++ 3 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 slack_bolt/message_references.py create mode 100644 tests/slack_bolt/test_message_references.py diff --git a/docs/english/concepts/message-listening.md b/docs/english/concepts/message-listening.md index be6e74678..d70ae81df 100644 --- a/docs/english/concepts/message-listening.md +++ b/docs/english/concepts/message-listening.md @@ -27,4 +27,21 @@ def say_hello_regex(say, context): # regular expression matches are inside of context.matches greeting = context['matches'][0] say(f"{greeting}, how are you?") -``` \ No newline at end of file +``` + +## Parsing references in message text + +Retrieved Slack messages can contain mrkdwn references such as user mentions, channel links, user group mentions, dates, and URLs. To parse these references from a message's `text` value, use `parse_slack_references()`: + +```python +from slack_bolt.message_references import parse_slack_references + + +@app.message("hello") +def handle_message(message, say): + references = parse_slack_references(message["text"]) + user_ids = [ref.id for ref in references if ref.type == "user"] + say(f"Found {len(user_ids)} user mention(s)") +``` + +The parser only extracts the IDs and labels present in the text. To retrieve the latest names or other entity details, use the Web API methods such as `users.info`, `conversations.info`, or `usergroups.list`. diff --git a/slack_bolt/message_references.py b/slack_bolt/message_references.py new file mode 100644 index 000000000..46b56cd7f --- /dev/null +++ b/slack_bolt/message_references.py @@ -0,0 +1,189 @@ +"""Utilities for parsing Slack references in message text.""" + +import re +from dataclasses import dataclass +from typing import List, Optional, Tuple + +USER_REFERENCE = "user" +CHANNEL_REFERENCE = "channel" +USERGROUP_REFERENCE = "usergroup" +SPECIAL_MENTION_REFERENCE = "special_mention" +DATE_REFERENCE = "date" +LINK_REFERENCE = "link" +UNKNOWN_REFERENCE = "unknown" + +_ANGLE_BRACKET_REFERENCE_PATTERN = re.compile(r"<([^<>\n]+)>") +_SPECIAL_MENTION_NAMES = {"here", "channel", "everyone"} + +__all__ = [ + "CHANNEL_REFERENCE", + "DATE_REFERENCE", + "LINK_REFERENCE", + "SPECIAL_MENTION_REFERENCE", + "UNKNOWN_REFERENCE", + "USER_REFERENCE", + "USERGROUP_REFERENCE", + "SlackMessageReference", + "extract_channel_ids", + "extract_user_ids", + "extract_usergroup_ids", + "parse_slack_references", +] + + +@dataclass(frozen=True) +class SlackMessageReference: + """A Slack mrkdwn reference found inside a message text string.""" + + type: str + raw: str + start: int + end: int + id: Optional[str] = None + label: Optional[str] = None + url: Optional[str] = None + special_mention: Optional[str] = None + timestamp: Optional[str] = None + date_format: Optional[str] = None + fallback: Optional[str] = None + + +def parse_slack_references(text: str) -> List[SlackMessageReference]: + """Parses Slack mrkdwn references such as user mentions, channel links, and URLs. + + This helper only parses the syntax present in the text. It does not resolve + IDs to their latest names; use the Slack Web API for current entity data. + """ + return [_to_slack_message_reference(match) for match in _ANGLE_BRACKET_REFERENCE_PATTERN.finditer(text)] + + +def extract_user_ids(text: str) -> List[str]: + return [ref.id for ref in parse_slack_references(text) if ref.type == USER_REFERENCE and ref.id is not None] + + +def extract_channel_ids(text: str) -> List[str]: + return [ref.id for ref in parse_slack_references(text) if ref.type == CHANNEL_REFERENCE and ref.id is not None] + + +def extract_usergroup_ids(text: str) -> List[str]: + return [ref.id for ref in parse_slack_references(text) if ref.type == USERGROUP_REFERENCE and ref.id is not None] + + +def _to_slack_message_reference(match: re.Match) -> SlackMessageReference: + raw = match.group(0) + inner = match.group(1) + start, end = match.span() + target, label = _split_label(inner) + + if target.startswith("@"): + return _to_user_reference(target=target, label=label, raw=raw, start=start, end=end) + + if target.startswith("#"): + return _to_channel_reference(target=target, label=label, raw=raw, start=start, end=end) + + if target.startswith("!subteam^"): + return _to_usergroup_reference(target=target, label=label, raw=raw, start=start, end=end) + + if target.startswith("!date^"): + return _to_date_reference(target=target, fallback=label, raw=raw, start=start, end=end) + + if target.startswith("!"): + return _to_special_mention_reference(target=target, label=label, raw=raw, start=start, end=end) + + return SlackMessageReference(type=LINK_REFERENCE, raw=raw, start=start, end=end, url=target, label=label) + + +def _to_user_reference( + *, + target: str, + label: Optional[str], + raw: str, + start: int, + end: int, +) -> SlackMessageReference: + user_id = target[1:] + if not user_id.startswith(("U", "W")): + return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label) + return SlackMessageReference(type=USER_REFERENCE, raw=raw, start=start, end=end, id=user_id, label=label) + + +def _to_channel_reference( + *, + target: str, + label: Optional[str], + raw: str, + start: int, + end: int, +) -> SlackMessageReference: + channel_id = target[1:] + if channel_id == "": + return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label) + return SlackMessageReference(type=CHANNEL_REFERENCE, raw=raw, start=start, end=end, id=channel_id, label=label) + + +def _to_usergroup_reference( + *, + target: str, + label: Optional[str], + raw: str, + start: int, + end: int, +) -> SlackMessageReference: + prefix_length = len("!subteam^") + usergroup_id = target[prefix_length:] + if usergroup_id == "": + return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label) + return SlackMessageReference(type=USERGROUP_REFERENCE, raw=raw, start=start, end=end, id=usergroup_id, label=label) + + +def _to_special_mention_reference( + *, + target: str, + label: Optional[str], + raw: str, + start: int, + end: int, +) -> SlackMessageReference: + name = target[1:] + if name not in _SPECIAL_MENTION_NAMES: + return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label) + return SlackMessageReference( + type=SPECIAL_MENTION_REFERENCE, + raw=raw, + start=start, + end=end, + label=label, + special_mention=name, + ) + + +def _to_date_reference( + *, + target: str, + fallback: Optional[str], + raw: str, + start: int, + end: int, +) -> SlackMessageReference: + elements = target.split("^", 3) + if len(elements) < 3 or elements[1] == "" or elements[2] == "": + return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, fallback=fallback) + + url = elements[3] if len(elements) > 3 and elements[3] != "" else None + return SlackMessageReference( + type=DATE_REFERENCE, + raw=raw, + start=start, + end=end, + timestamp=elements[1], + date_format=elements[2], + url=url, + fallback=fallback, + ) + + +def _split_label(value: str) -> Tuple[str, Optional[str]]: + if "|" in value: + target, label = value.split("|", 1) + return target, label + return value, None diff --git a/tests/slack_bolt/test_message_references.py b/tests/slack_bolt/test_message_references.py new file mode 100644 index 000000000..8192cf771 --- /dev/null +++ b/tests/slack_bolt/test_message_references.py @@ -0,0 +1,158 @@ +from slack_bolt.message_references import ( + CHANNEL_REFERENCE, + DATE_REFERENCE, + LINK_REFERENCE, + SPECIAL_MENTION_REFERENCE, + UNKNOWN_REFERENCE, + USER_REFERENCE, + USERGROUP_REFERENCE, + SlackMessageReference, + extract_channel_ids, + extract_user_ids, + extract_usergroup_ids, + parse_slack_references, +) + + +class TestMessageReferences: + def test_parse_slack_references(self): + text = ( + "Hi <@U01V09UNAJZ|some_user>, join <#C123ABC456|general>, " + "ping , , see , " + "email , " + "and note ." + ) + + references = parse_slack_references(text) + + assert references == [ + SlackMessageReference( + type=USER_REFERENCE, + raw="<@U01V09UNAJZ|some_user>", + start=text.index("<@U01V09UNAJZ|some_user>"), + end=text.index("<@U01V09UNAJZ|some_user>") + len("<@U01V09UNAJZ|some_user>"), + id="U01V09UNAJZ", + label="some_user", + ), + SlackMessageReference( + type=CHANNEL_REFERENCE, + raw="<#C123ABC456|general>", + start=text.index("<#C123ABC456|general>"), + end=text.index("<#C123ABC456|general>") + len("<#C123ABC456|general>"), + id="C123ABC456", + label="general", + ), + SlackMessageReference( + type=USERGROUP_REFERENCE, + raw="", + start=text.index(""), + end=text.index("") + len(""), + id="SAZ94GDB8", + label="ops", + ), + SlackMessageReference( + type=SPECIAL_MENTION_REFERENCE, + raw="", + start=text.index(""), + end=text.index("") + len(""), + special_mention="here", + ), + SlackMessageReference( + type=LINK_REFERENCE, + raw="", + start=text.index(""), + end=text.index("") + len(""), + url="https://example.com", + label="docs", + ), + SlackMessageReference( + type=LINK_REFERENCE, + raw="", + start=text.index(""), + end=text.index("") + + len(""), + url="mailto:perihelion@example.com", + label="Email Perihelion", + ), + SlackMessageReference( + type=DATE_REFERENCE, + raw="", + start=text.index(""), + end=text.index("") + + len(""), + timestamp="1392734382", + date_format="{date_short}", + url="https://example.com/", + fallback="Feb 18, 2014 PST", + ), + ] + + for reference in references: + assert text[reference.start : reference.end] == reference.raw + + def test_parse_slack_references_without_labels(self): + text = "<@W123> <#G123> " + + references = parse_slack_references(text) + + assert references == [ + SlackMessageReference(type=USER_REFERENCE, raw="<@W123>", start=0, end=7, id="W123"), + SlackMessageReference(type=CHANNEL_REFERENCE, raw="<#G123>", start=8, end=15, id="G123"), + SlackMessageReference(type=USERGROUP_REFERENCE, raw="", start=16, end=31, id="S123"), + SlackMessageReference( + type=LINK_REFERENCE, + raw="", + start=32, + end=53, + url="https://example.com", + ), + ] + + def test_extract_ids(self): + text = "<@U111> <@W222|person> <#C111|general> " + + assert extract_user_ids(text) == ["U111", "W222"] + assert extract_channel_ids(text) == ["C111"] + assert extract_usergroup_ids(text) == ["S111"] + + def test_parse_unknown_references(self): + text = "<@B123|bot> " + + references = parse_slack_references(text) + + assert references == [ + SlackMessageReference(type=UNKNOWN_REFERENCE, raw="<@B123|bot>", start=0, end=11, label="bot"), + SlackMessageReference(type=UNKNOWN_REFERENCE, raw="", start=12, end=28, label="label"), + SlackMessageReference( + type=UNKNOWN_REFERENCE, + raw="", + start=29, + end=59, + fallback="fallback", + ), + ] + + def test_parse_date_reference_with_caret_in_url(self): + text = "" + + references = parse_slack_references(text) + + assert references == [ + SlackMessageReference( + type=DATE_REFERENCE, + raw=text, + start=0, + end=len(text), + timestamp="1392734382", + date_format="{date_short}", + url="https://example.com/a^b", + fallback="Feb 18, 2014 PST", + ) + ] + + def test_does_not_parse_escaped_or_multiline_angle_brackets(self): + text = "<@U111> <@U222\n> <@U333>" + + assert parse_slack_references(text) == [ + SlackMessageReference(type=USER_REFERENCE, raw="<@U333>", start=23, end=30, id="U333") + ]