Implement performance improvements for memory caching, HTTP client reuse, and regex optimization
Co-authored-by: Kenearos <86194771+Kenearos@users.noreply.github.com>
This commit is contained in:
parent
860e4d5027
commit
b72cd9db1c
5 changed files with 451 additions and 246 deletions
54
utils.py
54
utils.py
|
|
@ -20,8 +20,22 @@ class MentionDetector:
|
|||
# Create patterns for various mention formats
|
||||
# Include bot name and all nicknames
|
||||
all_names = [bot_name] + self.nicknames
|
||||
self.patterns = []
|
||||
|
||||
# Build a single combined pattern for efficient matching (uses alternation)
|
||||
name_alternatives = "|".join(re.escape(name) for name in all_names)
|
||||
|
||||
# Combined pattern that matches any mention format
|
||||
# This is more efficient than checking multiple patterns separately
|
||||
combined_mention_pattern = (
|
||||
rf"(?:@(?:{name_alternatives})\b)|" # @name
|
||||
rf"(?:\b(?:{name_alternatives})[:!?.,])|" # name: name! etc.
|
||||
rf"(?:^(?:{name_alternatives})\b)|" # name at start
|
||||
rf"(?:\b(?:{name_alternatives})\b)" # name anywhere
|
||||
)
|
||||
self._mention_pattern = re.compile(combined_mention_pattern, re.IGNORECASE)
|
||||
|
||||
# Keep individual patterns list for backward compatibility (tests may use it)
|
||||
self.patterns = []
|
||||
for name in all_names:
|
||||
self.patterns.extend([
|
||||
rf"@{name}\b", # @name (with word boundary)
|
||||
|
|
@ -29,12 +43,21 @@ class MentionDetector:
|
|||
rf"^{name}\b", # name at start of message
|
||||
rf"\b{name}\b", # name anywhere as whole word
|
||||
])
|
||||
|
||||
# Case-insensitive compilation
|
||||
self.compiled_patterns = [
|
||||
re.compile(pattern, re.IGNORECASE) for pattern in self.patterns
|
||||
]
|
||||
|
||||
# Pre-compile extraction patterns for each name (more efficient than recompiling each time)
|
||||
self._extraction_patterns = {}
|
||||
for name in all_names:
|
||||
escaped_name = re.escape(name)
|
||||
self._extraction_patterns[name.lower()] = {
|
||||
'at_start': re.compile(rf"^@{escaped_name}\b[,:]?\s*", re.IGNORECASE),
|
||||
'name_start': re.compile(rf"^{escaped_name}\b[,:]?\s*", re.IGNORECASE),
|
||||
'name_end': re.compile(rf"\s*\b{escaped_name}[,!?.]?\s*$", re.IGNORECASE),
|
||||
'name_middle': re.compile(rf"\s*\b{escaped_name}[,:!?]\s*", re.IGNORECASE),
|
||||
}
|
||||
|
||||
# Patterns for ambiguous greetings (might be directed at bot)
|
||||
self.greeting_patterns = [
|
||||
r"^(hi|hey|hallo|hello|servus|moin)(\s|$|\W)",
|
||||
|
|
@ -78,10 +101,8 @@ class MentionDetector:
|
|||
if not message:
|
||||
return False
|
||||
|
||||
for pattern in self.compiled_patterns:
|
||||
if pattern.search(message):
|
||||
return True
|
||||
return False
|
||||
# Use the optimized single combined pattern for faster matching
|
||||
return bool(self._mention_pattern.search(message))
|
||||
|
||||
def is_ambiguous_greeting(self, message):
|
||||
"""
|
||||
|
|
@ -122,18 +143,21 @@ class MentionDetector:
|
|||
content = message
|
||||
all_names = [self.bot_name] + self.nicknames
|
||||
|
||||
# Use pre-compiled patterns for better performance
|
||||
for name in all_names:
|
||||
# Remove @mention at start
|
||||
content = re.sub(rf"^@{name}\b[,:]?\s*", "", content, flags=re.IGNORECASE)
|
||||
patterns = self._extraction_patterns.get(name.lower())
|
||||
if patterns:
|
||||
# Remove @mention at start
|
||||
content = patterns['at_start'].sub("", content)
|
||||
|
||||
# Remove name at start with optional punctuation
|
||||
content = re.sub(rf"^{name}\b[,:]?\s*", "", content, flags=re.IGNORECASE)
|
||||
# Remove name at start with optional punctuation
|
||||
content = patterns['name_start'].sub("", content)
|
||||
|
||||
# Remove name at end with optional punctuation
|
||||
content = re.sub(rf"\s*\b{name}[,!?.]?\s*$", "", content, flags=re.IGNORECASE)
|
||||
# Remove name at end with optional punctuation
|
||||
content = patterns['name_end'].sub("", content)
|
||||
|
||||
# Remove name in middle with punctuation
|
||||
content = re.sub(rf"\s*\b{name}[,:!?]\s*", " ", content, flags=re.IGNORECASE)
|
||||
# Remove name in middle with punctuation
|
||||
content = patterns['name_middle'].sub(" ", content)
|
||||
|
||||
return content.strip()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue