Fix text extraction utility and TTS

2024-12-21 23:33:22 +01:00 · 2024-12-21 23:33:22 +01:00 · e20976adb5
commit e20976adb5
parent 67cc797010
5 changed files with 27 additions and 14 deletions
--- a/js/complete.min.js
+++ b/js/complete.min.js
--- a/js/tts.min.js
+++ b/js/tts.min.js
--- a/js/utility.min.js
+++ b/js/utility.min.js
--- a/src/js/tts.js
+++ b/src/js/tts.js
@ -283,7 +283,7 @@ function fcn_readTextStack() {
      current.classList.remove('current-reading');
    }

-    _$$$(fcn_currentReadingId).classList.add('current-reading');
+    _$$$(fcn_currentReadingId)?.classList.add('current-reading');
  }

  fcn_utter.text = fcn_ttsCurrentText;
@ -333,8 +333,7 @@ if (typeof speechSynthesis !== 'undefined' && fcn_ttsInterface) {
    // Prepare items to read
    fcn_ttsStack = fcn_ttsStack.flatMap(node => {
      const result = [];
-      const inner = node.querySelector('.paragraph-inner');
-      const text = inner ? inner.textContent : node.textContent;
+      const text = FcnUtils.extractTextNodes(node);

      // Split text into array of sentences using a regex pattern
      const sentences = text.replace(regex, '$1|').split('|');
--- a/src/js/utility.js
+++ b/src/js/utility.js
@ -759,14 +759,28 @@ const FcnUtils = {
   *
   * @since 5.27.0
   * @param {HTMLElement} element - The element.
+   * @param {Set<String>} allowedTags - Set of allowed tag names.
   * @return {String} Extracted text or empty string.
   */

-  extractTextNodes(element) {
-    return Array.from(element.childNodes)
-      .filter(node => node.nodeType === Node.TEXT_NODE)
-      .map(node => node.textContent.trim())
-      .join(' ');
+  extractTextNodes(element, allowedTags = new Set(['strong', 'b', 'em', 'i', 'u', 'code', 'a', 's', 'kbd', 'sub', 'sup', 'span', 'label', 'button', 'ins', 'del', 'small', 'mark', 'q', 'abbr', 'time', 'cite'])) {
+    let result = '';
+
+    element.childNodes.forEach(node => {
+      if (node.nodeType === Node.TEXT_NODE) {
+        result += node.textContent.replace(/\r?\n/g, ' ');
+      } else if (node.nodeType === Node.ELEMENT_NODE) {
+        const tagName = node.tagName.toLowerCase();
+
+        if (tagName === 'br') {
+          result += ' ';
+        } else if (allowedTags.has(tagName)) {
+          result += FcnUtils.extractTextNodes(node, allowedTags);
+        }
+      }
+    });
+
+    return result.replace(/\s+/g, ' ').trim();
  },

  /**