Text Splitting Function Refactor

Created on: September 28, 2024

Answered using OpenAI 01 Preview by OpenAI01

Question

function splitText(node) {
const maxLength = 32;
const text = node.value;
const formattingObjects = node.formatting?.bold || [];
const result = [];

if (text.length + formattingObjects.length * 2 <= maxLength) {
result.push({ ...node });
return result;
}

formattingObjects.sort((a, b) => a.begin - b.begin);

let startIndex = 0;

while (startIndex < text.length) {
let possibleEndIndex = Math.min(startIndex + maxLength, text.length);

let endIndex = possibleEndIndex;
let overhead = 0; // Initialize overhead to 0

for (let fmt of formattingObjects) {
if (fmt.end <= startIndex) {
continue;
}
if (fmt.begin >= possibleEndIndex) {
break;
}
overhead += 2;
}

if (text.length - startIndex + overhead > maxLength) {
let newlineIndex = text.lastIndexOf('\n', possibleEndIndex - 1);
if (newlineIndex > startIndex) {
endIndex = newlineIndex + 1;
} else {
let spaceIndex = text.lastIndexOf(' ', possibleEndIndex - 1);
if (spaceIndex > startIndex) {
endIndex = spaceIndex + 1;
} else {
endIndex = possibleEndIndex;
}
}
}

while (true) {
let overhead = 0;
for (let fmt of formattingObjects) {
if (fmt.end <= startIndex) {
continue;
}
if (fmt.begin >= endIndex) {
break;
}
overhead += 2;
}

let totalLength = (endIndex - startIndex) + overhead;
if (totalLength <= maxLength) {
break;
} else {
endIndex--;
if (endIndex <= startIndex) {
endIndex = startIndex + 1;
break;
}
}
}

let substring = text.substring(startIndex, endIndex);
let newNode = { ...node };
newNode.value = substring.trim();

newNode.formatting = { bold: [] };
for (let fmt of formattingObjects) {
if (fmt.end <= startIndex || fmt.begin >= endIndex) {
continue;
}
let newFmt = {};
newFmt.begin = Math.max(fmt.begin, startIndex) - startIndex;
newFmt.end = Math.min(fmt.end, endIndex) - startIndex;
newNode.formatting.bold.push(newFmt);
}

result.push(newNode);

startIndex = endIndex;
}

return result;
}

subdivide this code on multiple js functions without breaking the logic

Thought

Breaking down the function

I’m streamlining the JavaScript function by identifying logical blocks to create separate, smaller functions, while keeping the original logic intact.

Segmenting the text

I’m breaking down the text into manageable chunks, each no longer than 32 characters, while accounting for formatting overhead like bold markers. This ensures clarity and prevents exceeding the character limit.

Formatting and segmenting

I’m examining how the text is broken into segments, factoring in formatting costs like bold ranges, and ensuring each segment respects the maximum length constraint.

Refactoring the code structure to add helper functions for calculating overhead, adjusting endIndex, and extracting substring and formatting, aiming to ensure totalLength stays within maxLength constraints.

Crafting helper functions

I’m assembling helper functions to refine splitText for oversized text handling. These functions calculate overhead, adjust end indices, and extract substrings with formatting.

Answer

Certainly! Below is the refactored code, subdivided into multiple functions while preserving the original logic:

function splitText(node) { const maxLength = 32; const text = node.value; const formattingObjects = node.formatting?.bold || []; const result = []; if (text.length + formattingObjects.length * 2 <= maxLength) { result.push({ ...node }); return result; } formattingObjects.sort((a, b) => a.begin - b.begin); let startIndex = 0; while (startIndex < text.length) { let possibleEndIndex = Math.min(startIndex + maxLength, text.length); let endIndex = adjustEndIndex(text, formattingObjects, startIndex, possibleEndIndex, maxLength); let newNode = extractSubstringAndFormatting(node, text, formattingObjects, startIndex, endIndex); result.push(newNode); startIndex = endIndex; } return result; } function calculateOverhead(formattingObjects, startIndex, endIndex) { let overhead = 0; for (let fmt of formattingObjects) { if (fmt.end <= startIndex) { continue; } if (fmt.begin >= endIndex) { break; } overhead += 2; } return overhead; } function adjustEndIndex(text, formattingObjects, startIndex, possibleEndIndex, maxLength) { let endIndex = possibleEndIndex; let overhead = calculateOverhead(formattingObjects, startIndex, endIndex); if (text.length - startIndex + overhead > maxLength) { let newlineIndex = text.lastIndexOf('\n', possibleEndIndex - 1); if (newlineIndex > startIndex) { endIndex = newlineIndex + 1; } else { let spaceIndex = text.lastIndexOf(' ', possibleEndIndex - 1); if (spaceIndex > startIndex) { endIndex = spaceIndex + 1; } else { endIndex = possibleEndIndex; } } } while (true) { overhead = calculateOverhead(formattingObjects, startIndex, endIndex); let totalLength = (endIndex - startIndex) + overhead; if (totalLength <= maxLength) { break; } else { endIndex--; if (endIndex <= startIndex) { endIndex = startIndex + 1; break; } } } return endIndex; } function extractSubstringAndFormatting(node, text, formattingObjects, startIndex, endIndex) { let substring = text.substring(startIndex, endIndex); let newNode = { ...node }; newNode.value = substring.trim(); newNode.formatting = { bold: [] }; for (let fmt of formattingObjects) { if (fmt.end <= startIndex || fmt.begin >= endIndex) { continue; } let newFmt = {}; newFmt.begin = Math.max(fmt.begin, startIndex) - startIndex; newFmt.end = Math.min(fmt.end, endIndex) - startIndex; newNode.formatting.bold.push(newFmt); } return newNode; }

Explanation:

  1. calculateOverhead: Computes the additional length introduced by formatting within a given text range.
  2. adjustEndIndex: Determines the optimal endIndex for splitting the text, considering the maxLength, formatting overhead, and preferable breaking points (like newlines or spaces).
  3. extractSubstringAndFormatting: Extracts a substring from the original text and adjusts the formatting ranges accordingly to create a new node.
  4. splitText: Orchestrates the splitting process using the helper functions, ensuring that each piece adheres to the maxLength constraint while maintaining formatting integrity.

By modularizing the code, we've improved readability and maintainability without altering the original functionality.

Share this Q&A