Richer card rendering using data from Wiktionary

The choice for pulling HTML data instead of Wiki markup from
Wiktionary was made because the markup content would have
required a lot of code to expand the template data into user
readable content.
This commit is contained in:
Thiago Chaves 2022-07-17 22:11:06 +03:00
parent d4b838106a
commit ec836a8465
12 changed files with 254 additions and 61 deletions

View File

@ -1,6 +1,11 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: true,
}
experimental: {
images: {
unoptimized: true,
},
},
};
module.exports = nextConfig
module.exports = nextConfig;

View File

@ -0,0 +1,11 @@
import { Expression } from "../../model";
export interface ExpressionDescriptionProps {
expression: Expression;
}
export function ExpressionDescription({
expression,
}: ExpressionDescriptionProps) {
return <div dangerouslySetInnerHTML={{ __html: expression.description }} />;
}

View File

@ -0,0 +1 @@
export * from "./ExpressionDescription";

6
src/mock/kivi.ts Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,4 @@
import { parseWiktionaryData } from "../model/parseWiktionaryData";
import {
Category,
Expression,
@ -5,31 +6,26 @@ import {
ExpressionToCategory,
ExpressionToExpressionSet,
} from "../model/types";
import { KiviRaw } from "./kivi";
import { PuhuaRaw } from "./puhua";
interface RawExpressionDataItem {
prompt: string;
description: string;
category: string;
expression_set: string;
data: string;
}
const RawExpressionData: RawExpressionDataItem[] = [
["kivi", KiviRaw],
["puhua", PuhuaRaw],
].map(([prompt, data]) => ({
prompt,
data,
}));
interface RawExpressionSetItem {
name: string;
description: string;
}
const RawExpressionData: RawExpressionDataItem[] = [
["koira", "noun", "daily", "dog, dog paddle (swimming stroke)"],
["liite", "noun", "daily", "attachment, appendix, affix, supplement"],
["havaita", "verb", "daily", "to observe, to detect, to perceive"],
["tukea", "verb", "daily", "to support, finance, sponsor, substantiate"],
].map(([prompt, category, expression_set, description]) => ({
prompt,
description,
category,
expression_set,
}));
const RawExpressionSetData: RawExpressionSetItem[] = [
["daily", "New expressions and poorly remembered expressions"],
["weekly", "Expressions to be reviewed at the end of the week"],
@ -38,6 +34,34 @@ const RawExpressionSetData: RawExpressionSetItem[] = [
["ancient", "Expressions here should be memorized by now"],
].map(([name, description]) => ({ name, description }));
const RawCategories: string[] = [
"noun",
"verb",
"adjective",
"adverb",
"determiner",
"article",
"preposition",
"conjunction",
"proper noun",
"letter",
"character",
"phrase",
"proverb",
"idiom",
"symbol",
"syllable",
"numeral",
"initialism",
"interjection",
"definitions",
"pronoun",
"particle",
"predicative",
"participle",
"suffix",
];
interface MockData {
// Tables
categories: Category[];
@ -49,25 +73,22 @@ interface MockData {
expression_to_expression_set: ExpressionToExpressionSet[];
}
export function parseRawData(
raw_expression_data: RawExpressionDataItem[],
raw_expression_set_data: RawExpressionSetItem[]
): MockData {
const category_names = new Set(
raw_expression_data.map((item) => item.category)
);
const categories: Category[] = Array.from(category_names).map((name) => ({
interface ParseRawDataArgs {
raw_category_data: string[];
raw_expression_data: RawExpressionDataItem[];
raw_expression_set_data: RawExpressionSetItem[];
}
export function parseRawData({
raw_category_data,
raw_expression_data,
raw_expression_set_data,
}: ParseRawDataArgs): MockData {
const categories: Category[] = raw_category_data.map((name) => ({
name,
description: name,
}));
const expressions: Expression[] = raw_expression_data.map(
({ prompt, description }) => ({
prompt,
description,
})
);
const expression_sets: ExpressionSet[] = raw_expression_set_data.map(
({ name, description }) => ({
name,
@ -75,48 +96,61 @@ export function parseRawData(
})
);
const expression_to_category: ExpressionToCategory[] =
raw_expression_data.map((item, index) =>
matchExpressionAndCategory(index + 1, item, categories)
);
const expressions: Expression[] = [];
const expression_to_category: ExpressionToCategory[] = [];
const expression_to_expression_set: ExpressionToExpressionSet[] = [];
for (const { prompt, data } of raw_expression_data) {
console.log("Parsing", { prompt, data });
const expression = parseWiktionaryData(prompt, data);
if (!expression) continue;
const expression_to_expression_set: ExpressionToExpressionSet[] =
raw_expression_data.map((item, index) =>
matchExpressionAndExpressionSet(index + 1, item, expression_sets)
expressions.push(expression);
const expression_id = expressions.length;
/*
expression_to_category.push(
matchExpressionAndCategory({
expression_id,
category_name,
categories,
})
);
*/
expression_to_expression_set.push({
expression_id,
expression_set_id: 1,
});
}
return {
categories,
expressions,
expression_sets,
categories,
expression_to_category,
expression_to_expression_set,
};
}
function matchExpressionAndCategory(
expression_id: number,
{ category }: RawExpressionDataItem,
categories: Category[]
): ExpressionToCategory {
const category_id = categories.findIndex(({ name }) => name === category) + 1;
interface MatchExpressionAndCategoryParams {
expression_id: number;
category_name: string;
categories: Category[];
}
function matchExpressionAndCategory({
expression_id,
category_name,
categories,
}: MatchExpressionAndCategoryParams): ExpressionToCategory {
const category_id =
categories.findIndex(({ name }) => name === category_name) + 1;
return {
category_id,
expression_id,
};
}
function matchExpressionAndExpressionSet(
expression_id: number,
{ expression_set }: RawExpressionDataItem,
expression_sets: ExpressionSet[]
): ExpressionToExpressionSet {
const expression_set_id =
expression_sets.findIndex(({ name }) => name === expression_set) + 1;
return {
expression_id,
expression_set_id,
};
}
export const MockData = parseRawData(RawExpressionData, RawExpressionSetData);
export const MockData = parseRawData({
raw_category_data: RawCategories,
raw_expression_data: RawExpressionData,
raw_expression_set_data: RawExpressionSetData,
});

5
src/mock/puhua.ts Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,90 @@
import { Expression } from "./types";
export function parseWiktionaryData(
prompt: string,
data: string
): Expression | null {
// Prevent next.js SSR from crashing here
if (typeof window === "undefined") return null;
const parser = new DOMParser();
const description = JSON.parse(data).parse.text["*"];
const document = parser.parseFromString(description, "text/html");
// TODO settings-based language selection
const header = document.getElementById("Finnish")?.parentElement;
if (!header) return null;
// Grab window of wanted elements and filter out unwanted elements within the window
const elements: (Element | HTMLElement)[] = [];
let current = header.nextElementSibling;
while (
current &&
!["hr", "h2"].includes(current.tagName.toLocaleLowerCase()) &&
!isReferences(current)
) {
let include = true;
include = include && current.tagName.toLocaleLowerCase() !== "style";
include = include && !current.classList.contains("noprint");
include = include && !current.classList.contains("inflection-table");
include = include && !isDeclension(current);
if (include) elements.push(current);
current = current.nextElementSibling;
}
const content = document.createElement("div");
content.className = "wiktionary-content";
for (const element of elements) {
content.append(safeTreeClone(element) as string | Node);
}
return {
prompt,
description: content.outerHTML,
};
}
function isDeclension(element?: Element | HTMLElement) {
if (!element) return false;
if (element.id.includes("Conjugation")) return true;
if (element.id.includes("Declension")) return true;
let child = element.firstElementChild;
while (child) {
if (isDeclension(child)) return true;
child = child.nextElementSibling;
}
}
function isReferences(element?: Element | HTMLElement) {
if (!element) return false;
if (element.id.includes("References")) return true;
let child = element.firstElementChild;
while (child) {
if (isReferences(child)) return true;
child = child.nextElementSibling;
}
}
function safeTreeClone(element?: string | Element | HTMLElement) {
if (!element) return null;
if (typeof element === "string") return document.createTextNode(element);
if (element.tagName.toLowerCase() === "a")
return document.createTextNode(element.textContent || "");
if (element.classList.contains("mw-editsection")) return null;
if (element.classList.contains("nyms-toggle")) return null;
if (element.tagName.toLowerCase() === "sup") return null;
const result = document.createElement(element.tagName);
const children = element.childNodes;
for (let i = 0; i < children.length; i++) {
const child = children.item(i);
if (child.nodeName === "#text") result.append(child.cloneNode());
else {
const cloned = safeTreeClone(child as Element);
if (cloned) result.append(cloned);
}
}
return result;
}

View File

@ -1,3 +1,5 @@
// Internal types
export type Category = {
name: string;
description: string;

View File

@ -1,5 +1,6 @@
import "../styles/globals.css";
import "../styles/components.css";
import "../styles/wiktionary.css";
import type { AppProps } from "next/app";
import { Navigation } from "../components";

View File

@ -37,6 +37,7 @@
.text-details {
color: darkslategray;
font-size: 15px;
line-height: 1.3rem;
}
/* Page */

36
src/styles/wiktionary.css Normal file
View File

@ -0,0 +1,36 @@
.wiktionary-content:first-child {
margin-top: 0 !important;
}
.wiktionary-content h3 {
font-size: 18px;
margin: 14px 0px;
}
.wiktionary-content h4 {
font-size: 18px;
margin: 14px 0px;
}
.wiktionary-content p {
margin: 10px 0px;
}
.wiktionary-content strong {
font-weight: bold;
}
.wiktionary-content i {
color: darkslateblue;
font-style: italic;
}
.wiktionary-content ol {
padding-left: 16px;
list-style: decimal;
}
.wiktionary-content ul {
padding-left: 16px;
list-style: disc;
}

View File

@ -1,5 +1,6 @@
import { useState } from "react";
import { ExpressionCard } from "../../components";
import { ExpressionDescription } from "../../components/ExpressionDescription/ExpressionDescription";
import { IndexedExpression, IndexedCategory } from "../../model";
import { DemoteExpressionButton } from "./DemoteExpressionButton";
import { PromoteExpressionButton } from "./PromoteExpressionButton";
@ -21,7 +22,7 @@ export function ExpressionPracticeCardView({
<ExpressionCard
prompt={expression.prompt}
categories={categories.map((category) => category.name)}
description={expression.description}
description={<ExpressionDescription expression={expression} />}
show_description={revealed}
/>
</section>