From 3f7f5aa184ea55b77a327ae9f5c87fafcd6de0d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Sko=C5=99epa?= <jakub@skorepa.info> Date: Thu, 22 Sep 2016 20:36:52 +0200 Subject: [PATCH] Finish markdown parser and add tag collector --- lib/collectTags.js | 57 +++++++++++++++++++++++++++++ lib/index.js | 43 +++++++++++++++++----- lib/markdownToJSON/flatten.js | 67 ++++++++++++++++++++++++++++++++++ lib/markdownToJSON/index.js | 4 +- lib/markdownToJSON/renderer.js | 2 +- 5 files changed, 162 insertions(+), 11 deletions(-) create mode 100644 lib/collectTags.js create mode 100644 lib/markdownToJSON/flatten.js diff --git a/lib/collectTags.js b/lib/collectTags.js new file mode 100644 index 0000000..9c6cc15 --- /dev/null +++ b/lib/collectTags.js @@ -0,0 +1,57 @@ +import _ from 'lodash/fp' + +const collector = ({ pagination, warnings = {} }) => files => { + const { onlyOneArticle = false, sameURL = true } = warnings + const tags = {} + const tagList = [] + let warned = false + files.forEach(file => { + if(file.metadata.tags) + for(const tag of file.metadata.tags) { + const tagname = _.flow( + _.deburr, + _.toLower, + _.replace(' ','-'), + _.replace(/[^a-zA-Z-0-9]+/g, ''), + )(tag) + if(!tags[tagname]) { + const obj = { deburr: tagname, tag, list: [] } + tags[tagname] = obj + tagList.push(obj) + } + tags[tagname].list.push(file) + const warn = () => { if(!warned) { console.log(); warned = true } } + if(sameURL && tags[tagname].tag !== tag) { + warn() + console.error(`WARNING: Name of tag "${tag}" is not same as "${tags[tagname].tag}" but they correspond to same url of "${tagname}"`) + console.error(` In files ${file.metadata.filename} and ${tags[tagname].list[0].metadata.filename}`) + } + if(onlyOneArticle && tags[tagname].list.length < 2) { + warn() + console.error(`WARNING: Tag "${tag}" only has one article. Consider removing it or adding it more articles.`) + console.error(` In file ${file.metadata.filename}`) + } + } + }) + const tagPages = tagList.reduce((list, tag) => { + const chunks = _.chunk(pagination, tag.list) + const paginator = chunks.map((chunk, index) => '/tag/'+tag.deburr+(index?'/'+(index+1):'')) + return list.concat(chunks.map( + (chunk, index) => ({ + type: 'tag', + metadata: { + url: paginator[index], + name: tag.tag, + paginator, + }, + content: chunk + }) + )) + }, []) + return files.map( + article => ({ ...article, type: 'article' }) + ).concat(tagPages) + //console.log(tagList) +} + +export default collector diff --git a/lib/index.js b/lib/index.js index 00b1067..c1a6fe2 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,16 +4,41 @@ import fs from 'fs' import {readFiles} from './readFiles' import parser from './parser' import markdownToJSON from './markdownToJSON' +import collectTags from './collectTags' +const buildStep = (desc, func) => f => { + const start = new Date(); + const msg = `- ${desc}` + process.stdout.write(msg) + const ret = func(f) + process.stdout.write(_.repeat(30-msg.length,' ')) + process.stdout.write(`${(new Date()-start)}ms\n`) + return ret +} + +const map = (desc, func) => buildStep(desc, f => f.map(func)) + +let start readFiles({ filter: file => !/\.git$/.exec(file) -})(change => console.log()) -.then(files => files.map(f => parser(f))) -.then(files => files.filter(f => f)) -.then(files => files.map(f => ({ metadata: f.metadata, content: markdownToJSON(f.content) }))) -.then(files => files.map(f => { - console.log(_.repeat(80,'=')) - console.log(f.metadata) - console.log(f.content) -})) +})(change => console.log(change)) +.then(f => (start = new Date()) && f) +.then(map('Parsing files', f => parser(f))) +.then(map('Parsing markdown', f => ({ metadata: f.metadata, content: markdownToJSON(f.content) }))) +.then(map('Rewriting URLs', f => ({ + metadata: { + ...f.metadata, + url: '/clanek/' + f.metadata.filename.replace(/\.md$/,'') + }, + content: f.content +}))) +.then(buildStep('Collect tags', collectTags({ + pagination: 6, + warnings: { + onlyOneArticle: false, + sameURL: false, + } +}))) +//.then(f => console.log(f)) +.then(() => console.log(` Total: ${(new Date())-start}ms`)) .catch(e => console.log(e)) diff --git a/lib/markdownToJSON/flatten.js b/lib/markdownToJSON/flatten.js new file mode 100644 index 0000000..0ba8ece --- /dev/null +++ b/lib/markdownToJSON/flatten.js @@ -0,0 +1,67 @@ +import _ from 'lodash/fp' + +const join = (a, b) => { + const anmap = an => Object.assign({}, an, {from: an.from+a.text.length}) + const anotations = { ...a.anotations } + for(const anotation in b.anotations) { + if(anotations[anotation]) { // merge + anotations[anotation] = [ + ...a.anotations[anotation], + ...b.anotations[anotation].map(anmap) + ] + } else { + anotations[anotation] = b.anotations[anotation].map(anmap) + } + } + return { + text: a.text+b.text, + anotations + } +} + +const mergeAnotations = (a, b) => { + throw new Error('Not implemented'); +} + +const toAnotatedText = (par) => { + if(!par) { + return { text: '', anotations: {} } + } + + if(_.isString(par)) { + return { text: par, anotations: {} } + } else if(_.isArray(par)) { + return par.map(toAnotatedText).reduce(join) + return { text: 'TODO', anotations: {} } + } else { // object + const {name, opts} = par + const ant = toAnotatedText(par.children) + const anotation = { from: 0, length: ant.text.length, opts } + const ret = { text: ant.text, anotations: { ...ant.anotations }} + ret.anotations[name] = [ + ...(ant.anotations[name] || []), + anotation, + ] + return ret + } + console.log(par) + throw new Error('Panic') +} + +const mkmerger = merger => arg => { + for(const i in merger) { + + } +} + +export default a => { + //console.log(_.repeat(80,'=')) + //console.log(a) + const ret = mkmerger({ + + })( + toAnotatedText(a.filter(o => o)) + ) + //console.log(JSON.stringify(ret, undefined, ' ')) + return ret +} diff --git a/lib/markdownToJSON/index.js b/lib/markdownToJSON/index.js index adcaa54..c45e282 100644 --- a/lib/markdownToJSON/index.js +++ b/lib/markdownToJSON/index.js @@ -2,10 +2,12 @@ import _ from 'lodash/fp' import {toStr} from './renderer' import Parser from './parser' +import flatten from './flatten' import kramed from 'kramed' const mtj = new Parser() export default _.flow( a => kramed.lexer(a), - a => mtj.parse(a) + a => mtj.parse(a), + flatten ) diff --git a/lib/markdownToJSON/renderer.js b/lib/markdownToJSON/renderer.js index 927c8d9..26e03a1 100644 --- a/lib/markdownToJSON/renderer.js +++ b/lib/markdownToJSON/renderer.js @@ -82,7 +82,7 @@ Renderer.prototype.listitem = simple('li') Renderer.prototype.paragraph = simple('p') Renderer.prototype.tablerow = simple('tr') -Renderer.prototype.table = (header, body) => fmt('table', body, {header}) +Renderer.prototype.table = (header, body) => fmt('table', [fmt('thead', header), fmt('tbody', body)]) Renderer.prototype.list = function(body, ordered) { var type = ordered ? 'ol' : 'ul'; -- GitLab