Richer card rendering using data from Wiktionary

The choice for pulling HTML data instead of Wiki markup from
Wiktionary was made because the markup content would have
required a lot of code to expand the template data into user
readable content.
This commit is contained in:
Thiago Chaves 2022-07-17 22:11:06 +03:00
parent d4b838106a
commit ec836a8465
12 changed files with 254 additions and 61 deletions

View File

@ -1,6 +1,11 @@
/** @type {import('next').NextConfig} */ /** @type {import('next').NextConfig} */
const nextConfig = { const nextConfig = {
reactStrictMode: true, reactStrictMode: true,
} experimental: {
images: {
unoptimized: true,
},
},
};
module.exports = nextConfig module.exports = nextConfig;

View File

@ -0,0 +1,11 @@
import { Expression } from "../../model";
export interface ExpressionDescriptionProps {
expression: Expression;
}
export function ExpressionDescription({
expression,
}: ExpressionDescriptionProps) {
return <div dangerouslySetInnerHTML={{ __html: expression.description }} />;
}

View File

@ -0,0 +1 @@
export * from "./ExpressionDescription";

6
src/mock/kivi.ts Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,4 @@
import { parseWiktionaryData } from "../model/parseWiktionaryData";
import { import {
Category, Category,
Expression, Expression,
@ -5,31 +6,26 @@ import {
ExpressionToCategory, ExpressionToCategory,
ExpressionToExpressionSet, ExpressionToExpressionSet,
} from "../model/types"; } from "../model/types";
import { KiviRaw } from "./kivi";
import { PuhuaRaw } from "./puhua";
interface RawExpressionDataItem { interface RawExpressionDataItem {
prompt: string; prompt: string;
description: string; data: string;
category: string;
expression_set: string;
} }
const RawExpressionData: RawExpressionDataItem[] = [
["kivi", KiviRaw],
["puhua", PuhuaRaw],
].map(([prompt, data]) => ({
prompt,
data,
}));
interface RawExpressionSetItem { interface RawExpressionSetItem {
name: string; name: string;
description: string; description: string;
} }
const RawExpressionData: RawExpressionDataItem[] = [
["koira", "noun", "daily", "dog, dog paddle (swimming stroke)"],
["liite", "noun", "daily", "attachment, appendix, affix, supplement"],
["havaita", "verb", "daily", "to observe, to detect, to perceive"],
["tukea", "verb", "daily", "to support, finance, sponsor, substantiate"],
].map(([prompt, category, expression_set, description]) => ({
prompt,
description,
category,
expression_set,
}));
const RawExpressionSetData: RawExpressionSetItem[] = [ const RawExpressionSetData: RawExpressionSetItem[] = [
["daily", "New expressions and poorly remembered expressions"], ["daily", "New expressions and poorly remembered expressions"],
["weekly", "Expressions to be reviewed at the end of the week"], ["weekly", "Expressions to be reviewed at the end of the week"],
@ -38,6 +34,34 @@ const RawExpressionSetData: RawExpressionSetItem[] = [
["ancient", "Expressions here should be memorized by now"], ["ancient", "Expressions here should be memorized by now"],
].map(([name, description]) => ({ name, description })); ].map(([name, description]) => ({ name, description }));
const RawCategories: string[] = [
"noun",
"verb",
"adjective",
"adverb",
"determiner",
"article",
"preposition",
"conjunction",
"proper noun",
"letter",
"character",
"phrase",
"proverb",
"idiom",
"symbol",
"syllable",
"numeral",
"initialism",
"interjection",
"definitions",
"pronoun",
"particle",
"predicative",
"participle",
"suffix",
];
interface MockData { interface MockData {
// Tables // Tables
categories: Category[]; categories: Category[];
@ -49,25 +73,22 @@ interface MockData {
expression_to_expression_set: ExpressionToExpressionSet[]; expression_to_expression_set: ExpressionToExpressionSet[];
} }
export function parseRawData( interface ParseRawDataArgs {
raw_expression_data: RawExpressionDataItem[], raw_category_data: string[];
raw_expression_set_data: RawExpressionSetItem[] raw_expression_data: RawExpressionDataItem[];
): MockData { raw_expression_set_data: RawExpressionSetItem[];
const category_names = new Set( }
raw_expression_data.map((item) => item.category)
); export function parseRawData({
const categories: Category[] = Array.from(category_names).map((name) => ({ raw_category_data,
raw_expression_data,
raw_expression_set_data,
}: ParseRawDataArgs): MockData {
const categories: Category[] = raw_category_data.map((name) => ({
name, name,
description: name, description: name,
})); }));
const expressions: Expression[] = raw_expression_data.map(
({ prompt, description }) => ({
prompt,
description,
})
);
const expression_sets: ExpressionSet[] = raw_expression_set_data.map( const expression_sets: ExpressionSet[] = raw_expression_set_data.map(
({ name, description }) => ({ ({ name, description }) => ({
name, name,
@ -75,48 +96,61 @@ export function parseRawData(
}) })
); );
const expression_to_category: ExpressionToCategory[] = const expressions: Expression[] = [];
raw_expression_data.map((item, index) => const expression_to_category: ExpressionToCategory[] = [];
matchExpressionAndCategory(index + 1, item, categories) const expression_to_expression_set: ExpressionToExpressionSet[] = [];
); for (const { prompt, data } of raw_expression_data) {
console.log("Parsing", { prompt, data });
const expression = parseWiktionaryData(prompt, data);
if (!expression) continue;
const expression_to_expression_set: ExpressionToExpressionSet[] = expressions.push(expression);
raw_expression_data.map((item, index) => const expression_id = expressions.length;
matchExpressionAndExpressionSet(index + 1, item, expression_sets) /*
expression_to_category.push(
matchExpressionAndCategory({
expression_id,
category_name,
categories,
})
); );
*/
expression_to_expression_set.push({
expression_id,
expression_set_id: 1,
});
}
return { return {
categories,
expressions, expressions,
expression_sets, expression_sets,
categories,
expression_to_category, expression_to_category,
expression_to_expression_set, expression_to_expression_set,
}; };
} }
function matchExpressionAndCategory( interface MatchExpressionAndCategoryParams {
expression_id: number, expression_id: number;
{ category }: RawExpressionDataItem, category_name: string;
categories: Category[] categories: Category[];
): ExpressionToCategory { }
const category_id = categories.findIndex(({ name }) => name === category) + 1;
function matchExpressionAndCategory({
expression_id,
category_name,
categories,
}: MatchExpressionAndCategoryParams): ExpressionToCategory {
const category_id =
categories.findIndex(({ name }) => name === category_name) + 1;
return { return {
category_id, category_id,
expression_id, expression_id,
}; };
} }
function matchExpressionAndExpressionSet( export const MockData = parseRawData({
expression_id: number, raw_category_data: RawCategories,
{ expression_set }: RawExpressionDataItem, raw_expression_data: RawExpressionData,
expression_sets: ExpressionSet[] raw_expression_set_data: RawExpressionSetData,
): ExpressionToExpressionSet { });
const expression_set_id =
expression_sets.findIndex(({ name }) => name === expression_set) + 1;
return {
expression_id,
expression_set_id,
};
}
export const MockData = parseRawData(RawExpressionData, RawExpressionSetData);

5
src/mock/puhua.ts Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,90 @@
import { Expression } from "./types";
export function parseWiktionaryData(
prompt: string,
data: string
): Expression | null {
// Prevent next.js SSR from crashing here
if (typeof window === "undefined") return null;
const parser = new DOMParser();
const description = JSON.parse(data).parse.text["*"];
const document = parser.parseFromString(description, "text/html");
// TODO settings-based language selection
const header = document.getElementById("Finnish")?.parentElement;
if (!header) return null;
// Grab window of wanted elements and filter out unwanted elements within the window
const elements: (Element | HTMLElement)[] = [];
let current = header.nextElementSibling;
while (
current &&
!["hr", "h2"].includes(current.tagName.toLocaleLowerCase()) &&
!isReferences(current)
) {
let include = true;
include = include && current.tagName.toLocaleLowerCase() !== "style";
include = include && !current.classList.contains("noprint");
include = include && !current.classList.contains("inflection-table");
include = include && !isDeclension(current);
if (include) elements.push(current);
current = current.nextElementSibling;
}
const content = document.createElement("div");
content.className = "wiktionary-content";
for (const element of elements) {
content.append(safeTreeClone(element) as string | Node);
}
return {
prompt,
description: content.outerHTML,
};
}
function isDeclension(element?: Element | HTMLElement) {
if (!element) return false;
if (element.id.includes("Conjugation")) return true;
if (element.id.includes("Declension")) return true;
let child = element.firstElementChild;
while (child) {
if (isDeclension(child)) return true;
child = child.nextElementSibling;
}
}
function isReferences(element?: Element | HTMLElement) {
if (!element) return false;
if (element.id.includes("References")) return true;
let child = element.firstElementChild;
while (child) {
if (isReferences(child)) return true;
child = child.nextElementSibling;
}
}
function safeTreeClone(element?: string | Element | HTMLElement) {
if (!element) return null;
if (typeof element === "string") return document.createTextNode(element);
if (element.tagName.toLowerCase() === "a")
return document.createTextNode(element.textContent || "");
if (element.classList.contains("mw-editsection")) return null;
if (element.classList.contains("nyms-toggle")) return null;
if (element.tagName.toLowerCase() === "sup") return null;
const result = document.createElement(element.tagName);
const children = element.childNodes;
for (let i = 0; i < children.length; i++) {
const child = children.item(i);
if (child.nodeName === "#text") result.append(child.cloneNode());
else {
const cloned = safeTreeClone(child as Element);
if (cloned) result.append(cloned);
}
}
return result;
}

View File

@ -1,3 +1,5 @@
// Internal types
export type Category = { export type Category = {
name: string; name: string;
description: string; description: string;

View File

@ -1,5 +1,6 @@
import "../styles/globals.css"; import "../styles/globals.css";
import "../styles/components.css"; import "../styles/components.css";
import "../styles/wiktionary.css";
import type { AppProps } from "next/app"; import type { AppProps } from "next/app";
import { Navigation } from "../components"; import { Navigation } from "../components";

View File

@ -37,6 +37,7 @@
.text-details { .text-details {
color: darkslategray; color: darkslategray;
font-size: 15px; font-size: 15px;
line-height: 1.3rem;
} }
/* Page */ /* Page */

36
src/styles/wiktionary.css Normal file
View File

@ -0,0 +1,36 @@
.wiktionary-content:first-child {
margin-top: 0 !important;
}
.wiktionary-content h3 {
font-size: 18px;
margin: 14px 0px;
}
.wiktionary-content h4 {
font-size: 18px;
margin: 14px 0px;
}
.wiktionary-content p {
margin: 10px 0px;
}
.wiktionary-content strong {
font-weight: bold;
}
.wiktionary-content i {
color: darkslateblue;
font-style: italic;
}
.wiktionary-content ol {
padding-left: 16px;
list-style: decimal;
}
.wiktionary-content ul {
padding-left: 16px;
list-style: disc;
}

View File

@ -1,5 +1,6 @@
import { useState } from "react"; import { useState } from "react";
import { ExpressionCard } from "../../components"; import { ExpressionCard } from "../../components";
import { ExpressionDescription } from "../../components/ExpressionDescription/ExpressionDescription";
import { IndexedExpression, IndexedCategory } from "../../model"; import { IndexedExpression, IndexedCategory } from "../../model";
import { DemoteExpressionButton } from "./DemoteExpressionButton"; import { DemoteExpressionButton } from "./DemoteExpressionButton";
import { PromoteExpressionButton } from "./PromoteExpressionButton"; import { PromoteExpressionButton } from "./PromoteExpressionButton";
@ -21,7 +22,7 @@ export function ExpressionPracticeCardView({
<ExpressionCard <ExpressionCard
prompt={expression.prompt} prompt={expression.prompt}
categories={categories.map((category) => category.name)} categories={categories.map((category) => category.name)}
description={expression.description} description={<ExpressionDescription expression={expression} />}
show_description={revealed} show_description={revealed}
/> />
</section> </section>