add func to parse html to editor input

This commit is contained in:
Ajay Bura 2023-10-12 13:33:29 +05:30
parent 5940cf24a0
commit e1004baaa6
8 changed files with 334 additions and 15 deletions

2
package-lock.json generated
View file

@ -23,6 +23,7 @@
"classnames": "2.3.2",
"dateformat": "5.0.3",
"dayjs": "1.11.10",
"domhandler": "5.0.3",
"emojibase": "6.1.0",
"emojibase-data": "7.0.1",
"file-saver": "2.0.5",
@ -30,6 +31,7 @@
"focus-trap-react": "10.0.2",
"folds": "1.5.0",
"formik": "2.2.9",
"html-dom-parser": "4.0.0",
"html-react-parser": "4.2.0",
"immer": "9.0.16",
"is-hotkey": "0.2.0",

View file

@ -33,6 +33,7 @@
"classnames": "2.3.2",
"dateformat": "5.0.3",
"dayjs": "1.11.10",
"domhandler": "5.0.3",
"emojibase": "6.1.0",
"emojibase-data": "7.0.1",
"file-saver": "2.0.5",
@ -40,6 +41,7 @@
"focus-trap-react": "10.0.2",
"folds": "1.5.0",
"formik": "2.2.9",
"html-dom-parser": "4.0.0",
"html-react-parser": "4.2.0",
"immer": "9.0.16",
"is-hotkey": "0.2.0",

View file

@ -5,3 +5,4 @@ export * from './Elements';
export * from './keyboard';
export * from './output';
export * from './Toolbar';
export * from './input';

View file

@ -0,0 +1,301 @@
/* eslint-disable no-param-reassign */
import { Descendant, Text } from 'slate';
import parse from 'html-dom-parser';
import { ChildNode, Element, isText, isTag } from 'domhandler';
import { sanitizeCustomHtml } from '../../utils/sanitize';
import { BlockType, MarkType } from './Elements';
import {
BlockQuoteElement,
CodeBlockElement,
CodeLineElement,
EmoticonElement,
HeadingElement,
HeadingLevel,
InlineElement,
ListItemElement,
MentionElement,
OrderedListElement,
QuoteLineElement,
UnorderedListElement,
} from './slate';
import { parseMatrixToUrl } from '../../utils/matrix';
import { createEmoticonElement, createMentionElement } from './common';
const markNodeToType: Record<string, MarkType> = {
b: MarkType.Bold,
strong: MarkType.Bold,
i: MarkType.Italic,
u: MarkType.Underline,
s: MarkType.StrikeThrough,
code: MarkType.Code,
span: MarkType.Spoiler,
};
const elementToTextMark = (node: Element): MarkType | undefined => {
const markType = markNodeToType[node.name];
if (!markType) return undefined;
if (markType === MarkType.Spoiler && !node.attribs['data-mx-spoiler']) {
return undefined;
}
if (
markType === MarkType.Code &&
node.parent &&
'name' in node.parent &&
node.parent.name === 'pre'
) {
return undefined;
}
return markType;
};
const parseNodeText = (node: ChildNode): string => {
if (isText(node)) {
return node.data;
}
if (isTag(node)) {
node.children.map((child) => parseNodeText(child)).join('');
}
return '';
};
const elementToInlineNode = (node: Element): MentionElement | EmoticonElement | undefined => {
if (node.name === 'img' && node.attribs['data-mx-emoticon']) {
const { href, alt } = node.attribs;
if (!href) return undefined;
return createEmoticonElement(href, alt || 'Unknown Emoji');
}
if (node.name === 'a') {
const { href } = node.attribs;
if (typeof href !== 'string') return undefined;
const [mxId] = parseMatrixToUrl(href);
if (mxId) {
return createMentionElement(mxId, mxId, false);
}
}
return undefined;
};
const parseInlineNodes = (node: ChildNode): InlineElement[] => {
if (isText(node)) {
return [{ text: node.data }];
}
if (isTag(node)) {
const markType = elementToTextMark(node);
if (markType) {
const children = node.children.flatMap(parseInlineNodes);
children.forEach((child) => {
if (Text.isText(child)) {
child[markType] = true;
}
});
return children;
}
const inlineNode = elementToInlineNode(node);
if (inlineNode) return [inlineNode];
if (node.name === 'a') {
return node.childNodes.flatMap(parseInlineNodes);
}
return node.childNodes.flatMap(parseInlineNodes);
}
return [];
};
const parseBlockquoteNode = (node: Element): BlockQuoteElement => {
const children: QuoteLineElement[] = [];
let lineHolder: InlineElement[] = [];
const appendLine = () => {
if (lineHolder.length === 0) return;
children.push({
type: BlockType.QuoteLine,
children: lineHolder,
});
lineHolder = [];
};
node.children.forEach((child) => {
if (isText(child)) {
lineHolder.push({ text: child.data });
return;
}
if (isTag(child)) {
if (child.name === 'br') {
appendLine();
return;
}
if (child.name === 'p') {
appendLine();
children.push({
type: BlockType.QuoteLine,
children: child.children.flatMap((c) => parseInlineNodes(c)),
});
return;
}
parseInlineNodes(child).forEach((inlineNode) => lineHolder.push(inlineNode));
}
});
appendLine();
return {
type: BlockType.BlockQuote,
children,
};
};
const parseCodeBlockNode = (node: Element): CodeBlockElement => {
const children: CodeLineElement[] = [];
const code = parseNodeText(node);
code.split('\n').forEach((lineTxt) =>
children.push({
type: BlockType.CodeLine,
children: [
{
text: lineTxt,
},
],
})
);
return {
type: BlockType.CodeBlock,
children,
};
};
const parseListNode = (node: Element): OrderedListElement | UnorderedListElement => {
const children: ListItemElement[] = [];
let lineHolder: InlineElement[] = [];
const appendLine = () => {
if (lineHolder.length === 0) return;
children.push({
type: BlockType.ListItem,
children: lineHolder,
});
lineHolder = [];
};
node.children.forEach((child) => {
if (isText(child)) {
lineHolder.push({ text: child.data });
return;
}
if (isTag(child)) {
if (child.name === 'br') {
appendLine();
return;
}
if (child.name === 'li') {
appendLine();
children.push({
type: BlockType.ListItem,
children: child.children.flatMap((c) => parseInlineNodes(c)),
});
return;
}
parseInlineNodes(child).forEach((inlineNode) => lineHolder.push(inlineNode));
}
});
appendLine();
return {
type: node.name === 'ol' ? BlockType.OrderedList : BlockType.UnorderedList,
children,
};
};
const parseHeadingNode = (node: Element): HeadingElement => {
const children = node.children.flatMap((child) => parseInlineNodes(child));
const headingMatch = node.name.match(/^h([123456])$/);
const [, g1AsLevel] = headingMatch ?? ['h3', '3'];
const level = parseInt(g1AsLevel, 10);
return {
type: BlockType.Heading,
level: (level <= 3 ? level : 3) as HeadingLevel,
children,
};
};
export const domToEditorInput = (domNodes: ChildNode[]): Descendant[] => {
const children: Descendant[] = [];
let lineHolder: InlineElement[] = [];
const appendLine = () => {
if (lineHolder.length === 0) return;
children.push({
type: BlockType.Paragraph,
children: lineHolder,
});
lineHolder = [];
};
domNodes.forEach((node) => {
if (isText(node)) {
lineHolder.push({ text: node.data });
return;
}
if (isTag(node)) {
if (node.name === 'br') {
appendLine();
return;
}
if (node.name === 'p') {
appendLine();
children.push({
type: BlockType.Paragraph,
children: node.children.flatMap((child) => parseInlineNodes(child)),
});
return;
}
if (node.name === 'blockquote') {
appendLine();
children.push(parseBlockquoteNode(node));
return;
}
if (node.name === 'pre') {
appendLine();
children.push(parseCodeBlockNode(node));
return;
}
if (node.name === 'ol' || node.name === 'ul') {
appendLine();
children.push(parseListNode(node));
return;
}
if (node.name.match(/^h[123456]$/)) {
appendLine();
children.push(parseHeadingNode(node));
return;
}
parseInlineNodes(node).forEach((inlineNode) => lineHolder.push(inlineNode));
}
});
appendLine();
return children;
};
export const htmlToEditorInput = (unsafeHtml: string) => {
const sanitizedHtml = sanitizeCustomHtml(unsafeHtml);
const domNodes = parse(sanitizedHtml);
const editorNodes = domToEditorInput(domNodes);
return editorNodes;
};

View file

@ -1,7 +1,8 @@
import { Descendant, Text } from 'slate';
import { sanitizeText } from '../../utils/sanitize';
import { BlockType } from './Elements';
import { CustomElement, FormattedText } from './slate';
import { CustomElement } from './slate';
import { parseInlineMD } from '../../utils/markdown';
export type OutputOptions = {
@ -9,7 +10,7 @@ export type OutputOptions = {
allowMarkdown?: boolean;
};
const textToCustomHtml = (node: FormattedText, opts: OutputOptions): string => {
const textToCustomHtml = (node: Text, opts: OutputOptions): string => {
let string = sanitizeText(node.text);
if (opts.allowTextFormatting) {
if (node.bold) string = `<strong>${string}</strong>`;
@ -47,6 +48,7 @@ const elementToCustomHtml = (node: CustomElement, children: string): string => {
return `<ol>${children}</ol>`;
case BlockType.UnorderedList:
return `<ul>${children}</ul>`;
case BlockType.Mention:
return `<a href="https://matrix.to/#/${node.id}">${node.name}</a>`;
case BlockType.Emoticon:

View file

@ -23,13 +23,9 @@ export type FormattedText = Text & {
export type LinkElement = {
type: BlockType.Link;
href: string;
children: FormattedText[];
};
export type SpoilerElement = {
type: 'spoiler';
alert?: string;
children: FormattedText[];
children: Text[];
};
export type MentionElement = {
type: BlockType.Mention;
id: string;
@ -44,14 +40,16 @@ export type EmoticonElement = {
children: Text[];
};
export type InlineElement = Text | LinkElement | MentionElement | EmoticonElement;
export type ParagraphElement = {
type: BlockType.Paragraph;
children: FormattedText[];
children: InlineElement[];
};
export type HeadingElement = {
type: BlockType.Heading;
level: HeadingLevel;
children: FormattedText[];
children: InlineElement[];
};
export type CodeLineElement = {
type: BlockType.CodeLine;
@ -63,7 +61,7 @@ export type CodeBlockElement = {
};
export type QuoteLineElement = {
type: BlockType.QuoteLine;
children: FormattedText[];
children: InlineElement[];
};
export type BlockQuoteElement = {
type: BlockType.BlockQuote;
@ -71,7 +69,7 @@ export type BlockQuoteElement = {
};
export type ListItemElement = {
type: BlockType.ListItem;
children: FormattedText[];
children: InlineElement[];
};
export type OrderedListElement = {
type: BlockType.OrderedList;
@ -84,7 +82,6 @@ export type UnorderedListElement = {
export type CustomElement =
| LinkElement
// | SpoilerElement
| MentionElement
| EmoticonElement
| ParagraphElement

View file

@ -28,6 +28,15 @@ export const isRoomId = (id: string): boolean => validMxId(id) && id.startsWith(
export const isRoomAlias = (id: string): boolean => validMxId(id) && id.startsWith('#');
export const parseMatrixToUrl = (url: string): [string | undefined, string | undefined] => {
const href = decodeURIComponent(url);
const match = href.match(/^https?:\/\/matrix.to\/#\/([@!$+#]\S+:[^\\?|^\s|^\\/]+)(\?(via=\S+))?/);
if (!match) return [undefined, undefined];
const [, g1AsMxId, , g3AsVia] = match;
return [g1AsMxId, g3AsVia];
};
export const getRoomWithCanonicalAlias = (mx: MatrixClient, alias: string): Room | undefined =>
mx.getRooms()?.find((room) => room.getCanonicalAlias() === alias);

View file

@ -56,12 +56,17 @@ const permittedTagToAttributes = {
'data-mx-maths',
'data-mx-pill',
'data-mx-ping',
'data-md',
],
div: ['data-mx-maths'],
a: ['name', 'target', 'href', 'rel'],
a: ['name', 'target', 'href', 'rel', 'data-md'],
img: ['width', 'height', 'alt', 'title', 'src', 'data-mx-emoticon'],
ol: ['start'],
code: ['class'],
code: ['class', 'data-md'],
strong: ['data-md'],
i: ['data-md'],
u: ['data-md'],
s: ['data-md'],
};
const transformFontTag: Transformer = (tagName, attribs) => ({