Markdown V2 Parser Code

Created on: September 28, 2024

Answered using OpenAI 01 Preview by OpenAI01

Question

<file path="./package.json">
{
"name": "your-project",
"version": "1.0.0",
"type": "module",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"dependencies": {
"bcrypt": "^5.1.1",
"dotenv": "^16.4.5",
"express": "^4.21.0",
"html-entities": "^2.5.2",
"node-telegram-bot-api": "^0.66.0",
"redis": "^4.7.0",
"validator": "^13.12.0"
}
}
</file>
<file path="./config/config.js">
export const port = process.env.PORT || 8000
export const botToken = process.env.BOT_TOKEN
export const botUsername = process.env.BOT_USERNAME
export const redisUrl = process.env.REDIS_URL || 'redis://localhost:6379'

if (!botToken || !botUsername) {
throw new Error('BOT_TOKEN and BOT_USERNAME must be set in environment variables.')
}
</file>
<file path="./middleware/validateSessionId.js">
export const validateSessionId = (req, res, next) => {
const sessionId = (req.params.sessionId || '').trim()

if (!/^[a-zA-Z0-9_-]+$/.test(sessionId)) {
return res.status(400).json({ error: 'Invalid Session ID.' })
}

req.sessionId = sessionId
next()
}
</file>
<file path="./index.js">
import express from 'express'
import commandsRoutes from './routes/commands'
import updateRoutes from './routes/update'
import sessionsRoutes from './routes/sessions'
import { redisClient } from './utils/redisClient'
import { port } from './config/config'

const app = express()

app.use(express.json())
app.use('/commands', commandsRoutes)
app.use('/update', updateRoutes)
app.use('/sessions', sessionsRoutes)

const server = app.listen(port, () => {
console.log(`Server is listening on port ${port}`)
})

process.on('SIGINT', async () => {
console.log('Shutting down server...')
try {
await redisClient.quit()
server.close(() => {
console.log('Server closed.')
process.exit(0)
})
} catch (err) {
console.error('Error during shutdown:', err)
process.exit(1)
}
})
</file>
<file path="./bot/handlers.js">
import { bot } from './instance'
import { parseSessionId, withUserSession, startSession } from '../utils/session'

const start = async (chatId, userId, text) => {
const sessionId = parseSessionId(text)
if (!sessionId) return bot.sendMessage(chatId, 'Please provide a session ID.')

await startSession(sessionId, userId, chatId)
await bot.sendMessage(chatId, `Welcome to session ${sessionId}!`)
}

const clear = (chatId, userId) => {
return withUserSession(chatId, userId, (session) => {
session.commands.push({ type: 'CLEAR' })
session.promptNumber = 2
return bot.sendMessage(chatId, 'Session cleared.')
})
}

const request = (chatId, userId, text) => {
return withUserSession(chatId, userId, (session) => {
session.commands.push({ type: 'RUN', text, promptNumber: session.promptNumber })
session.promptNumber += 2
return bot.sendMessage(chatId, 'Processing your request...')
})
}

export const handleMessage = async (msg) => {
const chatId = msg.chat.id
const userId = msg.from.id
const text = msg.text?.trim() || ''

try {
if (text.startsWith('/start')) await start(chatId, userId, text)
else if (text.startsWith('/clear')) await clear(chatId, userId)
else await request(chatId, userId, text)
} catch (error) {
console.error('Error handling message:', error)
bot.sendMessage(chatId, 'An error occurred.')
}
}
</file>
<file path="./bot/instance.js">
import TelegramBot from 'node-telegram-bot-api'
import { botToken } from '../config/config'
import { handleMessage } from './handlers'

export const bot = new TelegramBot(botToken, { polling: true })

bot.on('message', handleMessage)
</file>
<file path="./controllers/bot.js">
import { withExistingSession } from '../utils/session'
import { bot } from '../bot/instance'
import { splitMessage } from '../utils/splitMessage'

export const processSession = async (sessionId) => {
await withExistingSession(sessionId, async (session) => {
session.processing = true
try {
while (session.results.length > 0) {
const resultText = session.results.shift()
await processMessages(session, resultText)
}
} catch (error) {
console.error(`Error processing session ${sessionId}:`, error)
} finally {
session.processing = false
}
})
}

export const processMessages = async (session, text) => {
if (!session.chatId) return
if (!session.messages) session.messages = []

try {
for (const chunk of splitMessage(text)) {
const message = await bot.sendMessage(session.chatId, chunk, { parse_mode: 'MarkdownV2' })
session.messages.push(message)
}
} catch (error) {
console.error(`Error sending messages for session ${session.sessionId}:`, error)
}
}
</file>
<file path="./routes/sessions.js">
import express from 'express'
import { redisClient } from '../utils/redisClient'

const router = express.Router()

router.get('/', async (req, res) => {
try {
const keys = await redisClient.keys('session:*')
const sessionIds = keys.map(key => key.replace('session:', ''))
res.json({ sessions: sessionIds })
} catch (error) {
console.error('Error fetching sessions:', error)
res.status(500).json({ error: 'Internal Server Error' })
}
})

export default router
</file>
<file path="./routes/commands.js">
import express from 'express'
import { validateSessionId } from '../middleware/validateSessionId'
import { withExistingOrNewSession } from '../utils/session'

const router = express.Router()

router.get('/:sessionId', validateSessionId, async (req, res) => {
const { sessionId } = req

try {
await withExistingOrNewSession(sessionId, (session) => {
const commands = [...session.commands]
session.commands = []
res.json({ commands })
})
} catch (error) {
console.error(`Error fetching commands for session ${sessionId}:`, error)
res.status(500).json({ error: 'Internal Server Error' })
}
})

router.post('/:sessionId', validateSessionId, async (req, res) => {
const { sessionId } = req
const command = req.body

try {
await withExistingOrNewSession(sessionId, (session) => {
if (!command || !command.type) {
return res.status(400).json({ error: 'Invalid command.' })
}

if (command.type === 'CLEAR') {
session.promptNumber = 2
} else {
command.promptNumber = session.promptNumber
session.promptNumber += 2
}

session.commands.push(command)
res.json({ message: 'Command added.' })
})
} catch (error) {
console.error(`Error adding command to session ${sessionId}:`, error)
res.status(500).json({ error: 'Internal Server Error' })
}
})

export default router
</file>
<file path="./routes/password.js">
import express from 'express'
import validator from 'validator'
import bcrypt from 'bcrypt'
import { validateSessionId } from '../middleware/validateSessionId'
import { withExistingOrNewSession } from '../utils/session'
import { botUsername } from '../config/config'

const router = express.Router()

router.post('/:sessionId', validateSessionId, async (req, res) => {
const { sessionId } = req
const { password } = req.body

try {
if (!password || validator.isEmpty(password)) {
return res.status(400).json({ error: 'Password is required.' })
}

const sanitizedPassword = validator.escape(password)

await withExistingOrNewSession(sessionId, async (session) => {
session.password = await bcrypt.hash(sanitizedPassword, 10)
})

res.json({ link: `https://t.me/${botUsername}?start=${sessionId}-${sanitizedPassword}` })
} catch (error) {
console.error(`Error setting password for session ${sessionId}:`, error)
res.status(500).json({ error: 'Internal Server Error' })
}
})

export default router
</file>
<file path="./routes/update.js">
import express from 'express'
import { validateSessionId } from '../middleware/validateSessionId'
import { getOrCreateSession, setSession } from '../utils/session'
import { bot } from '../bot/instance'

const router = express.Router()

router.post('/:sessionId', validateSessionId, async (req, res) => {
const { sessionId } = req
const resultText = req.body.text

try {
if (!resultText) {
return res.status(400).json({ error: 'Result text is required.' })
}

let session = await getOrCreateSession(sessionId)

session.results.push(resultText)
await setSession(sessionId, session)

if (session.chatId) {
await bot.sendMessage(session.chatId, resultText)
}

res.json({ message: 'Result received.' })
} catch (error) {
console.error(`Error updating session ${sessionId}:`, error)
res.status(500).json({ error: 'Internal Server Error' })
}
})

export default router
</file>
<file path="./utils/redisClient.js">
import { createClient } from 'redis'
import { redisUrl } from '../config/config'

export const redisClient = createClient({ url: redisUrl })

redisClient.on('error', (err) => console.error('Redis error:', err))

redisClient.connect().then(() => {
console.log('Connected to Redis.')
}).catch((error) => {
console.error('Failed to connect to Redis:', error)
process.exit(1)
})
</file>
<file path="./utils/markdownV2Text.js">
import { decode } from "html-entities";

function parseFormatting(node, type, symbol_left, symbol_right = symbol_left) {
if (!node.formatting) node.formatting = {}
if (!node.formatting[type]) node.formatting[type] = []

const escaped_left = symbol_left.replace(/[.*+?^${}()|\[\]\\]/g, '\\$&')
const escaped_right = symbol_right.replace(/[.*+?^${}()|\[\]\\]/g, '\\$&')

for (const match of node.value.matchAll(new RegExp(`${escaped_left}\\S.*?\\S${escaped_right}`, 'g'))) {
const left = match.index
const right = match.index + match[0].length - symbol_right.length
fixFormattingRanges(node.formatting, -1, left, symbol_left.length)
fixFormattingRanges(node.formatting, -1, right, symbol_right.length)
}

let removedLength = 0

const handle = (match, captured, index) => {
const begin = index - removedLength
const end = begin + captured.length
node.formatting[type].push({ begin, end })

removedLength += match.length - captured.length
return captured
}

node.value = node.value.replace(new RegExp(`${escaped_left}(\\S.*?\\S)${escaped_right}`, 'g'), handle)

removedLength = 0

node.value = node.value.replace(new RegExp(`${escaped_left}(\\S.*)$`), handle)
}
const formatting = [
(node) => node.value = decode(node.value),
(node) => parseFormatting(node, 'monospace', '`'),
(node) => parseFormatting(node, 'bold', '**'),
(node) => parseFormatting(node, 'italic', '*'),
(node) => parseFormatting(node, 'italic', '_'),
(node) => parseFormatting(node, 'strikethrough', '~~'),
(node) => parseFormatting(node, 'strikethrough', '~'),
(node) => parseFormatting(node, 'underline', '<u>', '</u>'),
(node) => parseFormatting(node, 'spoiler', '<spoiler>', '</spoiler>')
]

function iterateFormatting(formatting, callback) {
for (const type in formatting) {
formatting[type].forEach(f => {
callback(f, type)
})
}
}

function fixFormattingRanges(formatting, diff, index, count = 1) {
for (let i = index; i < index + count; i++) {
iterateFormatting(formatting, (f) => {
if (index < f.begin) f.begin += diff
if (index <= f.end) f.end += diff
})
}
}

function fixFormattingRangesV2(formatting, diff, index, count = 1) {
for (let i = index; i < index + count; i++) {
iterateFormatting(formatting, (f) => {
if (index < f.begin) f.begin += diff
if (index < f.end) f.end += diff
})
}
}

function symbolOfFormattingType(type) {
switch (type) {
case 'bold': return '*'
case 'italic': return '_'
case 'monospace': return '`'
case 'strikethrough': return '~'
case 'underline': return '__'
case 'spoiler': return '||'
}
}

function escapeMarkdownV2(node) {
let counter = 0
node.value = node.value.replace(/[\\\[\]\-(){}+_*~`>#=|!.]/g, (match, index) => {
fixFormattingRangesV2(node.formatting, +1, index + counter)
counter++
return '\\' + match
})
}

function parseText(node) {
for (const parse of formatting) {
parse(node)
}
return node
}

export function markdownV2Text(input_node) {
const node = { ...input_node }
parseText(node)
escapeMarkdownV2(node)
let text = node.value

function insert(i, symbol) {
fixFormattingRanges(node.formatting, +symbol.length, i)
text = text.substring(0, i) + symbol + text.substring(i)
}

iterateFormatting(node.formatting, (f, type) => {
insert(f.begin, symbolOfFormattingType(type))
insert(f.end, symbolOfFormattingType(type))
})

text = text.replace(/\\!\\\[(?:\S|\S.*?\S)\\]\\$(?:\S|\S.*?\S)\\$/g, (match) => {
const match1 = match.matchAll(/\\\[(\S|\S.*?\S)\\]\\$(\S|\S.*?\S)\\$/g).next().value
return `![${match1[1]}](${match1[2]})`
})

text = text.replace(/\\\[(?:\S|\S.*?\S)\\]\\$(?:\S|\S.*?\S)\\$/g, (match) => {
const match1 = match.matchAll(/\\\[(\S|\S.*?\S)\\]\\$(\S|\S.*?\S)\\$/g).next().value
return `[${match1[1]}](${match1[2]})`
})

return text
}
</file>
<file path="./utils/markdownV2.js">
import { markdownV2Text } from "./markdownV2Text"

function parser(node, regex, type, parse = (text) => {return text}) {
if (node.type !== 'text') {
return [node]
}

const text = node.value
const matches = text.matchAll(regex)
const nodes = []
let lastIndex = 0

for (const match of matches) {
const value = parse(match[1])
const startIndex = match.index

if (startIndex > lastIndex) {
nodes.push({ type: 'text', value: text.substring(lastIndex, startIndex) })
}

nodes.push({ type, value })

lastIndex = startIndex + match[0].length
}

if (lastIndex < text.length) {
nodes.push({ type: 'text', value: text.substring(lastIndex) })
}

return nodes
}

const parsers = [
(node) => parser(node, /^```(.*?\n)```$/gms, 'code'),
(node) => parser(node, /^`(.*?\n)`$/gms, 'code'),
(node) => parser(node, /(((^\|.*)+\n?)+)/gm, 'table', (text) => {
const result = text.trim().split('\n').map(row => {
const cells = row.split('|').slice(1)
if (cells[cells.length - 1].trim() === '') {
cells.pop()
}
return cells.map(cell => cell.trim())
})
result.splice(1, 1)
return result
}),
(node) => parser(node, /^>(.*)/gm, 'quote', (text) => [{
type: 'text',
value: text,
}]),
(node) => parser(node, /^(#{1,6} .*)/gm, 'header', (text) => [{
type: 'text',
value: text.trim()
.replace(/\*\*(\S.*?\S)\*\*/g, (match, captured) => captured)
.replace(/\*\*(\S.*)$/g, (match, captured) => captured),
formatting: {
bold: [{ begin: 0, end: text.length }]
},
}]),
]

function escape(text) {
return text.replace(/[\\\[\]\-(){}+_*~`>#=|!.]/g, (match) => '\\' + match)
}

export function markdownV2(nodes) {
let output = ''
for (const node of nodes) {
switch (node.type) {
case 'text':
output += markdownV2Text(node) + '\n'
break
case 'code':
const codeBlock = '```' + escape(node.value) + '```'
let remainingCode = codeBlock

while (remainingCode.length > 0) {
const remainingSpace = MAX_OUTPUT_SIZE - output.length
const chunkLength = Math.min(remainingCode.length, remainingSpace)

output += remainingCode.substring(0, chunkLength)
remainingCode = remainingCode.substring(chunkLength)

if (remainingCode.length > 0) {
overflow()
}
}
break
case 'table':
output += '```markdown\n'

const rows = node.value
const maxLengths = rows[0].map((header, i) =>
Math.max(header.length, ...rows.slice(1).map(row => row[i] ? row[i].length : 0))
)

for (let i = 0; i < rows.length; i++) {
const row = rows[i]
output += escape('| ' + row.map((cell, j) => cell.padEnd(maxLengths[j])).join(' | ') + ' |\n')
if (i === 0) {
output += escape('| ' + maxLengths.map(length => '-'.repeat(length)).join(' | ') + ' |\n')
}
}

output += '```\n'
break
case 'quote':
output += '>' + markdownV2(node.value)
break
case 'header':
output += markdownV2(node.value)
break
}
}
if (output[output.length - 1] === '\n') {
output = output.slice(0, -1)
}
return output
}

export function parse(text) {
let in_nodes = [{type: 'text', value: text}]
for (const fn of parsers) {
const out_nodes = []
for (let i = 0; i < in_nodes.length; i++) {
out_nodes.push(...fn(in_nodes[i]))
}
in_nodes = out_nodes
}
return in_nodes
}
</file>
<file path="./utils/splitMessage.js">
import { parse, markdownV2 } from './markdownV2'

export const splitMessage = (text) => {
const maxLength = 4096
const formattedText = markdownV2(parse(text))
const messages = []

let remainingText = formattedText

while (remainingText.length > 0) {
if (remainingText.length <= maxLength) {
messages.push(remainingText)
break
}
let splitIndex = remainingText.lastIndexOf('\n', maxLength)
if (splitIndex === -1) {
splitIndex = maxLength
}
const messagePart = remainingText.slice(0, splitIndex).trim()
messages.push(messagePart)
remainingText = remainingText.slice(splitIndex).trim()
}
return messages
}
</file>
<file path="./utils/session.js">
import { redisClient } from './redisClient'
import { bot } from '../bot/instance'

const SESSION_TTL = 60 * 60 * 24

export const getSessionKey = (sessionId) => `session:${sessionId}`
export const getUserSessionKey = (userId) => `userSession:${userId}`

export const getSession = async (sessionId) => {
const data = await redisClient.get(getSessionKey(sessionId))
return data ? JSON.parse(data) : null
}

export const setSession = async (sessionId, session) => {
await redisClient.setEx(getSessionKey(sessionId), SESSION_TTL, JSON.stringify(session))
}

export const getUserSession = async (userId) => {
return await redisClient.get(getUserSessionKey(userId))
}

export const setUserSession = (userId, sessionId) => {
return redisClient.setEx(getUserSessionKey(userId), SESSION_TTL, sessionId)
}

export const createSession = async (sessionId) => {
const session = {
commands: [],
results: [],
promptNumber: 2,
userId: null,
chatId: null,
processing: false,
}
await setSession(sessionId, session)
return session
}

export const parseSessionId = (text) => {
const parts = text.trim().split(' ')
return parts.length > 1 ? parts[1] : null
}

export const getOrCreateSession = async (sessionId) => {
return (await getSession(sessionId)) || (await createSession(sessionId))
}

export const withUserSession = async (chatId, userId, update) => {
const sessionId = await getUserSession(userId)
if (!sessionId) {
await bot.sendMessage(chatId, 'No active session. Use /start <session_id> to start.')
return null
}

let session = await getSession(sessionId)
if (!session) {
await bot.sendMessage(chatId, 'Session not found.')
return null
}

const result = await update(session)
await setSession(sessionId, session)
return result
}

export const withExistingOrNewSession = async (sessionId, update) => {
let session = await getOrCreateSession(sessionId)
await update(session)
await setSession(sessionId, session)
}

export const withExistingSession = async (sessionId, update) => {
let session = await getSession(sessionId)
if (!session) return false

await update(session)
await setSession(sessionId, session)
return true
}

export const startSession = async (sessionId, userId, chatId) => {
const session = await getOrCreateSession(sessionId)
session.userId = userId
session.chatId = chatId
await setSession(sessionId, session)
await setUserSession(userId, sessionId)
}
</file>
<file path="./FORMATTING.md">
Forget all ";"
Forget all ".js" extensions in imports
Forget 2-4 spaces, use tabs
</file>
<file path="./INSTRUCTIONS.md">
Your task is to rewrite the code of the file utils/markdownV2.js and utils/markdownV2Text.js This is a **complex task** that requires careful consideration of various edge cases.

You should do in separate function that converts `nodes` to separated `nodes`. Of course, you need rewrite markdownV2/markdownV2Text functions to use it

**Here's how you should handle the splitting:**

* **Prioritize splitting by lines.** It's preferable to keep lines intact whenever possible.
* **Respect the 4096 character limit.** If adding more content to the current message would exceed the limit, create a new message and continue there. The previous message should remain as it is.
* **Apply the same logic to tables and code blocks.** Treat them as separate entities when splitting.
* **Handle cases where a table, code block, or even a single line of text exceeds 4096 characters.**
* If an element is too large to fit in a single message, truncate it to 4096 characters.
* For lines, try to split by words. If a single word is longer than 4096 characters, truncate it and continue the rest in subsequent messages.
* **Splitting tables:** Divide tables into parts by rows. If a row exceeds 4096 characters, you can split it by words. If a word within a row is longer than 4096 characters, truncate it as described for text.
* **Splitting code blocks:** Apply the same splitting logic as for tables.
</file>

Thought

Taking a closer look

I’m examining the codebase to rewrite markdownV2.js and markdownV2Text.js, ensuring careful handling of edge cases. The goal is to split nodes, manually ensure line integrity, and respect the 4096 character limit throughout.

Implementing file limitations

Handling large files by truncating or splitting them, adhering to a maximum character limit of 4096 per message. This approach ensures efficient management of file processing.

Revisiting markdown functions

Enhancing markdown parsing

I’m adding functionality to parse and format markdown text, including code blocks, tables, quotes, and headers, while trimming whitespace and handling special characters.

Piecing together the task

I’m transforming code in utils/markdownV2.js and utils/markdownV2Text.js. A new function will split nodes into separate messages, adhering to the 4096-character limit and managing code blocks and tables efficiently.

Crafting message breaks

I'm thinking about how to implement a function to manage nodes, ensuring each message stays under 4096 characters by employing the 'splitMessage' logic effectively.

Formulating the function

OK, let me see. The splitMessage function parses and formats text, then chunks it into 4096-character messages, favoring line breaks. I’m thinking of refining this to better handle code blocks, tables, and textual nodes.

Crafting the splitter

I'm working on a specialized function to split nodes into 4096-character chunks, ensuring precision within code blocks, tables, and text, while adhering to the instructions.

Implementing the split

I’m focusing on creating a function to manage nodes into messages, splitting text and code blocks to ensure formatting fits within the character limit. This involves tracking current message length and applying specific splitting rules.

Implementing splitNodes and adjusting markdownV2 and markdownV2Text.

Defining the function

I’m thinking through 'splitNodes' and considering how to structure it effectively. Progressing step-by-step to ensure comprehension and functionality.

Breaking down nodes

I’m dissecting each node's content, estimating its length, and strategically cutting it into smaller, manageable pieces to keep each message node within the limit.

Breaking down function design

I’m thinking through the steps to implement the 'splitNodes' function in utils/markdownV2.js. This function will simplify splitting nodes for better clarity in the code.

Answer

<file path="./utils/markdownV2.js"> ```javascript import { markdownV2Text } from "./markdownV2Text"

function parser(node, regex, type, parse = (text) => text) { if (node.type !== 'text') { return [node] }

const text = node.value
const matches = text.matchAll(regex)
const nodes = []
let lastIndex = 0

for (const match of matches) {
	const value = parse(match[1])
	const startIndex = match.index

	if (startIndex > lastIndex) {
		nodes.push({ type: 'text', value: text.substring(lastIndex, startIndex) })
	}

	nodes.push({ type, value })

	lastIndex = startIndex + match[0].length
}

if (lastIndex < text.length) {
	nodes.push({ type: 'text', value: text.substring(lastIndex) })
}

return nodes

}

const parsers = [ (node) => parser(node, /^(.*?\n) $/gms, 'code'), (node) => parser(node, /^`(.*?\n)`$ /gms, 'code'), (node) => parser(node, /(((^|.)+\n?)+)/gm, 'table', (text) => { const result = text.trim().split('\n').map(row => { const cells = row.split('|').slice(1) if (cells[cells.length - 1].trim() === '') { cells.pop() } return cells.map(cell => cell.trim()) }) result.splice(1, 1) return result }), (node) => parser(node, /^>(.)/gm, 'quote', (text) => [{ type: 'text', value: text, }]), (node) => parser(node, /^(#{1,6} .)/gm, 'header', (text) => [{ type: 'text', value: text.trim() .replace(/**(\S.?\S)**/g, (match, captured) => captured) .replace(/**(\S.*)$/g, (match, captured) => captured), formatting: { bold: [{ begin: 0, end: text.length }] }, }]), ]

function escape(text) { return text.replace(/[$$$$-(){}+_*~`>#=|!.]/g, (match) => '\' + match) }

const MAX_MESSAGE_LENGTH = 4096

export function markdownV2(nodes) { const messageNodes = splitNodes(nodes) const messages = messageNodes.map(messageNodeArray => { let output = '' for (const node of messageNodeArray) { output += renderNode(node) } if (output[output.length - 1] === '\n') { output = output.slice(0, -1) } return output }) return messages }

function renderNode(node) { switch (node.type) { case 'text': return markdownV2Text(node) + '\n' case 'code': return renderCodeNode(node) case 'table': return renderTableNode(node) case 'quote': return '>' + markdownV2(node.value).join('\n') case 'header': return markdownV2(node.value).join('\n') default: return '' } }

function renderCodeNode(node) { const codeBlock = '' + escape(node.value) + '\n' return codeBlock }

function renderTableNode(node) { let output = '```markdown\n'

const rows = node.value
const maxLengths = rows[0].map((header, i) =>
	Math.max(header.length, ...rows.slice(1).map(row => row[i] ? row[i].length : 0))
)

for (let i = 0; i < rows.length; i++) {
	const row = rows[i]
	output += escape('| ' + row.map((cell, j) => cell.padEnd(maxLengths[j])).join(' | ') + ' |\n')
	if (i === 0) {
		output += escape('| ' + maxLengths.map(length => '-'.repeat(length)).join(' | ') + ' |\n')
	}
}

output += '```\n'
return output

}

export function parse(text) { let in_nodes = [{type: 'text', value: text}] for (const fn of parsers) { const out_nodes = [] for (let i = 0; i < in_nodes.length; i++) { out_nodes.push(...fn(in_nodes[i])) } in_nodes = out_nodes } return in_nodes }

function splitNodes(nodes) { const messages = [] let currentMessageNodes = [] let currentMessageLength = 0

for (const node of nodes) {
	const nodeLength = estimateNodeLength(node)
	if (currentMessageLength + nodeLength <= MAX_MESSAGE_LENGTH) {
		currentMessageNodes.push(node)
		currentMessageLength += nodeLength
	} else {
		const splitResult = splitNode(node, MAX_MESSAGE_LENGTH - currentMessageLength)
		if (splitResult.before.length > 0) {
			currentMessageNodes.push(...splitResult.before)
		}
		if (currentMessageNodes.length > 0) {
			messages.push(currentMessageNodes)
			currentMessageNodes = []
			currentMessageLength = 0
		}
		if (splitResult.after.length > 0) {
			currentMessageNodes.push(...splitResult.after)
			currentMessageLength = estimateNodesLength(currentMessageNodes)
		}
	}
}

if (currentMessageNodes.length > 0) {
	messages.push(currentMessageNodes)
}

return messages

}

function estimateNodeLength(node) { switch (node.type) { case 'text': return estimateTextNodeLength(node) case 'code': return estimateCodeNodeLength(node) case 'table': return estimateTableNodeLength(node) case 'quote': return 1 + estimateNodesLength(node.value) case 'header': return estimateNodesLength(node.value) default: return 0 } }

function estimateTextNodeLength(node) { const renderedText = markdownV2Text(node) return renderedText.length + 1 }

function estimateCodeNodeLength(node) { const codeBlock = '' + escape(node.value) + '\n' return codeBlock.length }

function estimateTableNodeLength(node) { const renderedTable = renderTableNode(node) return renderedTable.length }

function estimateNodesLength(nodes) { let length = 0 for (const node of nodes) { length += estimateNodeLength(node) } return length }

function splitNode(node, availableLength) { switch (node.type) { case 'text': return splitTextNode(node, availableLength) case 'code': return splitCodeNode(node, availableLength) case 'table': return splitTableNode(node, availableLength) case 'quote': const splitValue = splitNodes(node.value) const before = [{ type: 'quote', value: splitValue[0] }] const after = splitValue.slice(1).map(valueNodes => ({ type: 'quote', value: valueNodes })) return { before, after } case 'header': const splitHeader = splitNodes(node.value) const beforeHeader = [{ type: 'header', value: splitHeader[0] }] const afterHeaders = splitHeader.slice(1).map(valueNodes => ({ type: 'header', value: valueNodes })) return { before: beforeHeader, after: afterHeaders } default: return { before: [node], after: [] } } }

function splitTextNode(node, availableLength) { const text = node.value const formattedText = markdownV2Text(node) if (formattedText.length <= availableLength) { return { before: [node], after: [] } } const lines = text.split('\n') const beforeLines = [] let currentLength = 0 for (let i = 0; i < lines.length; i++) { const line = lines[i] const lineNode = { ...node, value: line } const lineLength = estimateTextNodeLength(lineNode) if (currentLength + lineLength <= availableLength) { beforeLines.push(line) currentLength += lineLength } else { const splitLineResult = splitLine(line, node, availableLength - currentLength) if (splitLineResult.before) { beforeLines.push(splitLineResult.before) } const beforeNode = { ...node, value: beforeLines.join('\n') } const afterLines = [] if (splitLineResult.after) { afterLines.push(splitLineResult.after) } afterLines.push(...lines.slice(i + 1)) const afterNode = { ...node, value: afterLines.join('\n') } return { before: [beforeNode], after: [afterNode] } } } const beforeNode = { ...node, value: beforeLines.join('\n') } return { before: [beforeNode], after: [] } }

function splitLine(line, node, availableLength) { const words = line.split(' ') const beforeWords = [] let currentLength = 0 for (let i = 0; i < words.length; i++) { const word = words[i] const wordNode = { ...node, value: word } const wordLength = estimateTextNodeLength(wordNode) if (currentLength + wordLength <= availableLength) { beforeWords.push(word) currentLength += wordLength + 1 } else { if (currentLength === 0) { const maxWordLength = availableLength - currentLength const truncatedWord = word.slice(0, maxWordLength) const remainingWord = word.slice(maxWordLength) beforeWords.push(truncatedWord) const beforeLine = beforeWords.join(' ') const afterLine = [remainingWord, ...words.slice(i + 1)].join(' ') return { before: beforeLine, after: afterLine } } else { const beforeLine = beforeWords.join(' ') const afterLine = words.slice(i).join(' ') return { before: beforeLine, after: afterLine } } } } return { before: line, after: null } }

function splitCodeNode(node, availableLength) { const code = node.value const codeLines = code.split('\n') const beforeLines = [] let currentLength = 6 for (let i = 0; i < codeLines.length; i++) { const line = codeLines[i] const lineLength = line.length + 1 if (currentLength + lineLength <= availableLength) { beforeLines.push(line) currentLength += lineLength } else { const beforeCode = beforeLines.join('\n') const afterLines = codeLines.slice(i) const beforeNode = { type: 'code', value: beforeCode } const afterNode = { type: 'code', value: afterLines.join('\n') } return { before: [beforeNode], after: [afterNode] } } } const beforeNode = { type: 'code', value: beforeLines.join('\n') } return { before: [beforeNode], after: [] } }

function splitTableNode(node, availableLength) { const rows = node.value const beforeRows = [] let currentLength = 0 for (let i = 0; i < rows.length; i++) { const row = rows[i] const rowNode = { ...node, value: [row] } const rowLength = estimateTableNodeLength(rowNode) if (currentLength + rowLength <= availableLength) { beforeRows.push(row) currentLength += rowLength } else { const beforeNode = { type: 'table', value: beforeRows } const afterRows = rows.slice(i) const afterNode = { type: 'table', value: afterRows } return { before: [beforeNode], after: [afterNode] } } } const beforeNode = { type: 'table', value: beforeRows } return { before: [beforeNode], after: [] } }

</file>

<file path="./utils/markdownV2Text.js">
```javascript
import { decode } from "html-entities"

function parseFormatting(node, type, symbol_left, symbol_right = symbol_left) {
	if (!node.formatting) node.formatting = {}
	if (!node.formatting[type]) node.formatting[type] = []

	const escaped_left = symbol_left.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
	const escaped_right = symbol_right.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')

	for (const match of node.value.matchAll(new RegExp(`${escaped_left}\\S.*?\\S${escaped_right}`, 'g'))) {
		const left = match.index
		const right = match.index + match[0].length - symbol_right.length
		fixFormattingRanges(node.formatting, -1, left, symbol_left.length)
		fixFormattingRanges(node.formatting, -1, right, symbol_right.length)
	}

	let removedLength = 0

	const handle = (match, captured, index) => {
		const begin = index - removedLength
		const end = begin + captured.length
		node.formatting[type].push({ begin, end })

		removedLength += match.length - captured.length
		return captured
	}

	node.value = node.value.replace(new RegExp(`${escaped_left}(\\S.*?\\S)${escaped_right}`, 'g'), handle)

	removedLength = 0

	node.value = node.value.replace(new RegExp(`${escaped_left}(\\S.*)$`), handle)
}

const formatting = [
	(node) => node.value = decode(node.value),
	(node) => parseFormatting(node, 'monospace', '`'),
	(node) => parseFormatting(node, 'bold', '**'),
	(node) => parseFormatting(node, 'italic', '*'),
	(node) => parseFormatting(node, 'italic', '_'),
	(node) => parseFormatting(node, 'strikethrough', '~~'),
	(node) => parseFormatting(node, 'strikethrough', '~'),
	(node) => parseFormatting(node, 'underline', '<u>', '</u>'),
	(node) => parseFormatting(node, 'spoiler', '<spoiler>', '</spoiler>')
]

function iterateFormatting(formatting, callback) {
	for (const type in formatting) {
		formatting[type].forEach(f => {
			callback(f, type)
		})
	}
}

function fixFormattingRanges(formatting, diff, index, count = 1) {
	for (let i = index; i < index + count; i++) {
		iterateFormatting(formatting, (f) => {
			if (index < f.begin) f.begin += diff
			if (index <= f.end) f.end += diff
		})
	}
}

function fixFormattingRangesV2(formatting, diff, index, count = 1) {
	for (let i = index; i < index + count; i++) {
		iterateFormatting(formatting, (f) => {
			if (index < f.begin) f.begin += diff
			if (index < f.end) f.end += diff
		})
	}
}

function symbolOfFormattingType(type) {
	switch (type) {
		case 'bold': return '*'
		case 'italic': return '_'
		case 'monospace': return '`'
		case 'strikethrough': return '~'
		case 'underline': return '__'
		case 'spoiler': return '||'
	}
}

function escapeMarkdownV2(node) {
	let counter = 0
	node.value = node.value.replace(/[\$$$$\-(){}+_*~`>#=|!.]/g, (match, index) => {
		fixFormattingRangesV2(node.formatting, +1, index + counter)
		counter++
		return '\\' + match
	})
}

function parseText(node) {
	for (const parse of formatting) {
		parse(node)
	}
	return node
}

export function markdownV2Text(input_node) {
	const node = { ...input_node }
	parseText(node)
	escapeMarkdownV2(node)
	let text = node.value

	function insert(i, symbol) {
		fixFormattingRanges(node.formatting, +symbol.length, i)
		text = text.substring(0, i) + symbol + text.substring(i)
	}

	iterateFormatting(node.formatting, (f, type) => {
		insert(f.begin, symbolOfFormattingType(type))
		insert(f.end, symbolOfFormattingType(type))
	})

	text = text.replace(/\\!\\$$(?:\S|\S.*?\S)\$$\\$(?:\S|\S.*?\S)\\$/g, (match) => {
		const match1 = match.matchAll(/\\$$(\S|\S.*?\S)\$$\\$(\S|\S.*?\S)\\$/g).next().value
		return `![${match1[1]}](${match1[2]})`
	})

	text = text.replace(/\\$$(?:\S|\S.*?\S)\$$\\$(?:\S|\S.*?\S)\\$/g, (match) => {
		const match1 = match.matchAll(/\\$$(\S|\S.*?\S)\$$\\$(\S|\S.*?\S)\\$/g).next().value
		return `[${match1[1]}](${match1[2]})`
	})

	return text
}

</file>

Question

Thought

Answer

Share this Q&A