Skip to content
This repository was archived by the owner on May 12, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions src/cloud/lib/editor/htmlToMarkdown.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { htmlContainsImage, htmlToMarkdown } from './htmlToMarkdown'

describe('htmlToMarkdown', () => {
it('ignores html without images so normal paste behavior is preserved', () => {
expect(htmlContainsImage('<p>Hello <strong>world</strong></p>')).toBe(false)
expect(htmlToMarkdown('<p>Hello <strong>world</strong></p>')).toBe(null)
})

it('converts web clipboard html with images to markdown', () => {
expect(
htmlToMarkdown(
'<p>Hello <strong>world</strong></p><p><img src="https://example.com/cat.png" alt="Cat photo" title="Source"></p>'
)
).toBe(
'Hello **world**\n\n![Cat photo](https://example.com/cat.png "Source")'
)
})

it('preserves links around pasted web content', () => {
expect(
htmlToMarkdown(
'<div><a href="https://example.com/post">Read post</a><br><img src="https://example.com/post.jpg"></div>'
)
).toBe(
'[Read post](https://example.com/post)\n![](https://example.com/post.jpg)'
)
})
})
139 changes: 139 additions & 0 deletions src/cloud/lib/editor/htmlToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
const blockTags = new Set([
'ADDRESS',
'ARTICLE',
'ASIDE',
'BLOCKQUOTE',
'DIV',
'FIGURE',
'FOOTER',
'FORM',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HEADER',
'HR',
'MAIN',
'P',
'PRE',
'SECTION',
'TABLE',
])

const escapeMarkdown = (value: string) => {
return value.replace(/([\\`*_{}[\]()#+\-.!|>])/g, '\\$1')
}

const escapeLinkTitle = (value: string) => {
return value.replace(/"/g, '\\"')
}

const normalizeInlineWhitespace = (value: string) => {
return value.replace(/\s+/g, ' ')
}

const joinBlock = (value: string) => {
const trimmed = value.trim()
return trimmed.length > 0 ? `\n\n${trimmed}\n\n` : ''
}

const listItem = (value: string) => {
const trimmed = value.trim()
return trimmed.length > 0 ? `- ${trimmed}\n` : ''
}

const nodeToMarkdown = (node: Node): string => {
if (node.nodeType === Node.TEXT_NODE) {
return escapeMarkdown(normalizeInlineWhitespace(node.textContent || ''))
}

if (node.nodeType !== Node.ELEMENT_NODE) {
return ''
}

const element = node as HTMLElement
const tagName = element.tagName

if (tagName === 'BR') {
return '\n'
}

if (tagName === 'IMG') {
const image = element as HTMLImageElement
const src = image.getAttribute('src')
if (src == null || src.trim() === '') {
return ''
}
const alt = escapeMarkdown(image.getAttribute('alt') || '')
const title = image.getAttribute('title')
const titlePart =
title != null && title.trim() !== ''
? ` "${escapeLinkTitle(title.trim())}"`
: ''
return `![${alt}](${src.trim()}${titlePart})`
}

const children = Array.from(element.childNodes)
.map((child) => nodeToMarkdown(child))
.join('')

if (tagName === 'A') {
const href = element.getAttribute('href')
if (href == null || href.trim() === '') {
return children
}
const label = children.trim() || href.trim()
return `[${label}](${href.trim()})`
}

if (tagName === 'STRONG' || tagName === 'B') {
const trimmed = children.trim()
return trimmed.length > 0 ? `**${trimmed}**` : ''
}

if (tagName === 'EM' || tagName === 'I') {
const trimmed = children.trim()
return trimmed.length > 0 ? `_${trimmed}_` : ''
}

if (tagName === 'CODE') {
return `\`${children.trim().replace(/`/g, '\\`')}\``
}

if (tagName === 'LI') {
return listItem(children)
}

if (tagName === 'UL' || tagName === 'OL') {
return joinBlock(children)
}

if (blockTags.has(tagName)) {
return joinBlock(children)
}

return children
}

export const htmlContainsImage = (html: string) => {
return /<img\b[^>]*\bsrc\s*=/i.test(html)
}

export const htmlToMarkdown = (html: string): string | null => {
if (!htmlContainsImage(html)) {
return null
}

const parser = new DOMParser()
const doc = parser.parseFromString(html, 'text/html')
const markdown = Array.from(doc.body.childNodes)
.map((node) => nodeToMarkdown(node))
.join('')
.replace(/[ \t]+\n/g, '\n')
.replace(/\n{3,}/g, '\n\n')
.trim()

return markdown.length > 0 ? markdown : null
}
12 changes: 12 additions & 0 deletions src/cloud/lib/editor/plugins/fileHandler.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { boostHubBaseUrl } from '../../consts'
import { htmlToMarkdown } from '../htmlToMarkdown'

export type OnFileCallback = (file: File) => Promise<FileNode | null>

Expand Down Expand Up @@ -64,6 +65,17 @@ const attachFileHandlerToCodeMirrorEditor = (
for (let i = 0; i < files.length; i++) {
await handler(i > 0 ? instance.getCursor() : pos, files[i])
}
return
}

const html = event.clipboardData?.getData('text/html')
if (html != null && html !== '') {
const markdown = htmlToMarkdown(html)
if (markdown != null) {
event.stopPropagation()
event.preventDefault()
instance.replaceSelection(markdown, 'end')
}
}
}
)
Expand Down