Implement performance improvements for memory caching, HTTP client reuse, and regex optimization

Co-authored-by: Kenearos <86194771+Kenearos@users.noreply.github.com>
2026-01-27 17:33:17 +00:00 · 2026-01-27 17:33:17 +00:00 · b72cd9db1c
commit b72cd9db1c
parent 860e4d5027
5 changed files with 451 additions and 246 deletions
--- a/utils.py
+++ b/utils.py
@ -20,8 +20,22 @@ class MentionDetector:
        # Create patterns for various mention formats
        # Include bot name and all nicknames
        all_names = [bot_name] + self.nicknames
-        self.patterns = []

+        # Build a single combined pattern for efficient matching (uses alternation)
+        name_alternatives = "|".join(re.escape(name) for name in all_names)
+
+        # Combined pattern that matches any mention format
+        # This is more efficient than checking multiple patterns separately
+        combined_mention_pattern = (
+            rf"(?:@(?:{name_alternatives})\b)|"  # @name
+            rf"(?:\b(?:{name_alternatives})[:!?.,])|"  # name: name! etc.
+            rf"(?:^(?:{name_alternatives})\b)|"  # name at start
+            rf"(?:\b(?:{name_alternatives})\b)"  # name anywhere
+        )
+        self._mention_pattern = re.compile(combined_mention_pattern, re.IGNORECASE)
+
+        # Keep individual patterns list for backward compatibility (tests may use it)
+        self.patterns = []
        for name in all_names:
            self.patterns.extend([
                rf"@{name}\b",  # @name (with word boundary)
@ -29,12 +43,21 @@ class MentionDetector:
                rf"^{name}\b",  # name at start of message
                rf"\b{name}\b",  # name anywhere as whole word
            ])
-
-        # Case-insensitive compilation
        self.compiled_patterns = [
            re.compile(pattern, re.IGNORECASE) for pattern in self.patterns
        ]

+        # Pre-compile extraction patterns for each name (more efficient than recompiling each time)
+        self._extraction_patterns = {}
+        for name in all_names:
+            escaped_name = re.escape(name)
+            self._extraction_patterns[name.lower()] = {
+                'at_start': re.compile(rf"^@{escaped_name}\b[,:]?\s*", re.IGNORECASE),
+                'name_start': re.compile(rf"^{escaped_name}\b[,:]?\s*", re.IGNORECASE),
+                'name_end': re.compile(rf"\s*\b{escaped_name}[,!?.]?\s*$", re.IGNORECASE),
+                'name_middle': re.compile(rf"\s*\b{escaped_name}[,:!?]\s*", re.IGNORECASE),
+            }
+
        # Patterns for ambiguous greetings (might be directed at bot)
        self.greeting_patterns = [
            r"^(hi|hey|hallo|hello|servus|moin)(\s|$|\W)",
@ -78,10 +101,8 @@ class MentionDetector:
        if not message:
            return False

-        for pattern in self.compiled_patterns:
-            if pattern.search(message):
-                return True
-        return False
+        # Use the optimized single combined pattern for faster matching
+        return bool(self._mention_pattern.search(message))

    def is_ambiguous_greeting(self, message):
        """
@ -122,18 +143,21 @@ class MentionDetector:
        content = message
        all_names = [self.bot_name] + self.nicknames

+        # Use pre-compiled patterns for better performance
        for name in all_names:
-            # Remove @mention at start
-            content = re.sub(rf"^@{name}\b[,:]?\s*", "", content, flags=re.IGNORECASE)
+            patterns = self._extraction_patterns.get(name.lower())
+            if patterns:
+                # Remove @mention at start
+                content = patterns['at_start'].sub("", content)

-            # Remove name at start with optional punctuation
-            content = re.sub(rf"^{name}\b[,:]?\s*", "", content, flags=re.IGNORECASE)
+                # Remove name at start with optional punctuation
+                content = patterns['name_start'].sub("", content)

-            # Remove name at end with optional punctuation
-            content = re.sub(rf"\s*\b{name}[,!?.]?\s*$", "", content, flags=re.IGNORECASE)
+                # Remove name at end with optional punctuation
+                content = patterns['name_end'].sub("", content)

-            # Remove name in middle with punctuation
-            content = re.sub(rf"\s*\b{name}[,:!?]\s*", " ", content, flags=re.IGNORECASE)
+                # Remove name in middle with punctuation
+                content = patterns['name_middle'].sub(" ", content)

        return content.strip()