index(register) from an XML source
Hello list, please see this: ---------------------------------------- \startbuffer[test] <text> <div> <p>In this page there is a <name idref="id1">cat</name>, a <name idref="id2">dog</name> and a <name idref="id3">horse</name>.</p> </div> <div> <p>In this page there's only a <name idref="id2">fox terrier</name>.</p> </div> <div> <p>In this page you find a <name idref="id1">persian cat</name> and a <name idref="id3">horse</name>.</p> </div> <index> <entry id="id1" data-sort="cat">Cat</entry> <entry id="id2" data-sort="dog">Dog</entry> <entry id="id3" data-sort="horse">Horse</entry> </index> </text> \stopbuffer \defineregister[Animal][Animals] \startxmlsetups xml:mysetup \xmlsetsetup{#1}{text|div|p|name|i|index|entry}{xml:*} \stopxmlsetups \xmlregistersetup{xml:mysetup} \startxmlsetups xml:text \xmlflush{#1} \stopxmlsetups \startxmlsetups xml:div \xmlflush{#1}\page \stopxmlsetups \startxmlsetups xml:p \xmlflush{#1}\par \stopxmlsetups \startxmlsetups xml:name \Animal[\xmlfunction{#1}{sortKeyOfAnimal}]{\xmlfunction{#1}{indexEntryF orAnimal}}{\bf\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:i \dontleavehmode{\em\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:index \placeregister[Animal] \stopxmlsetups \startxmlsetups xml:entry \xmlflush{#1} \stopxmlsetups \startluacode userdata = userdata or {} userdata.animals = {} function userdata.findAnimal(t) local idref = xml.attribute(t, "", "idref") local animal = userdata.animals[idref] if not animal then local entry = xml.first(t, "ancestor::text//index/entry[@id = '" .. idref .. "']") local sortkey = xml.attribute(entry, "", "data-sort") animal = {} if sortkey and entry then animal.id = idref animal.sortkey = sortkey animal.entry = entry userdata.animals[idref] = animal end end return animal end function xml.functions.sortKeyOfAnimal(t) local animal = userdata.findAnimal(t) if animal.sortkey then context(animal.sortkey) else context("zzz") end end function xml.functions.indexEntryForAnimal(t) local animal = userdata.findAnimal(t) if animal.entry then lxml.flush(animal.entry) end end \stopluacode \starttext \xmlprocessbuffer{main}{test}{} \stoptext ---------------------------------------- It's a simplified version of the documents I'm working on: - it's XML - the index entries are already defined - the entries of the index have an "id" attribute and they are referred in the main text with an "idref" attribute - \Animal[sortkey]{entry} is used to index the terms in the text - "sortkey" and "entry" are calculated by the Lua code, that reads the idref and finds the right entry inside the <index> element (it also caches the entries) Context has no problems with that code, but the index I get (page 4) is something like this: c Cat 1 Cat 3 d Dog 1 Dog 2 h Horse 1 Horse 3 What I'd expect is instead: c Cat 1,3 d Dog 1,2 h Horse 1,3 Could you explain me why everytime I get a distinct entry in the index, even when the sort key and the entry are the same? (or I only think they are, but they are not really) Thanks in advance, Massi
On 1/13/2017 6:10 PM, MF wrote:
Hello list, please see this: ---------------------------------------- \startbuffer[test] <text> <div> <p>In this page there is a <name idref="id1">cat</name>, a <name idref="id2">dog</name> and a <name idref="id3">horse</name>.</p> </div> <div> <p>In this page there's only a <name idref="id2">fox terrier</name>.</p> </div> <div> <p>In this page you find a <name idref="id1">persian cat</name> and a <name idref="id3">horse</name>.</p> </div> <index> <entry id="id1" data-sort="cat">Cat</entry> <entry id="id2" data-sort="dog">Dog</entry> <entry id="id3" data-sort="horse">Horse</entry> </index> </text> \stopbuffer
\defineregister[Animal][Animals]
\startxmlsetups xml:mysetup \xmlsetsetup{#1}{text|div|p|name|i|index|entry}{xml:*} \stopxmlsetups
\xmlregistersetup{xml:mysetup}
\startxmlsetups xml:text \xmlflush{#1} \stopxmlsetups
\startxmlsetups xml:div \xmlflush{#1}\page \stopxmlsetups
\startxmlsetups xml:p \xmlflush{#1}\par \stopxmlsetups
\startxmlsetups xml:name
\Animal[\xmlfunction{#1}{sortKeyOfAnimal}]{\xmlfunction{#1}{indexEntryF orAnimal}}{\bf\xmlflush{#1}} \stopxmlsetups
\startxmlsetups xml:i \dontleavehmode{\em\xmlflush{#1}} \stopxmlsetups
\startxmlsetups xml:index \placeregister[Animal] \stopxmlsetups
\startxmlsetups xml:entry \xmlflush{#1} \stopxmlsetups
\startluacode userdata = userdata or {} userdata.animals = {}
function userdata.findAnimal(t) local idref = xml.attribute(t, "", "idref") local animal = userdata.animals[idref] if not animal then local entry = xml.first(t, "ancestor::text//index/entry[@id = '" .. idref .. "']") local sortkey = xml.attribute(entry, "", "data-sort") animal = {} if sortkey and entry then animal.id = idref animal.sortkey = sortkey animal.entry = entry userdata.animals[idref] = animal end end return animal end
function xml.functions.sortKeyOfAnimal(t) local animal = userdata.findAnimal(t) if animal.sortkey then context(animal.sortkey) else context("zzz") end end
function xml.functions.indexEntryForAnimal(t) local animal = userdata.findAnimal(t) if animal.entry then lxml.flush(animal.entry) end end \stopluacode
\starttext \xmlprocessbuffer{main}{test}{} \stoptext ----------------------------------------
It's a simplified version of the documents I'm working on: - it's XML - the index entries are already defined - the entries of the index have an "id" attribute and they are referred in the main text with an "idref" attribute - \Animal[sortkey]{entry} is used to index the terms in the text - "sortkey" and "entry" are calculated by the Lua code, that reads the idref and finds the right entry inside the <index> element (it also caches the entries)
Context has no problems with that code, but the index I get (page 4) is something like this:
c Cat 1 Cat 3
d Dog 1 Dog 2
h Horse 1 Horse 3
What I'd expect is instead:
c Cat 1,3
d Dog 1,2
h Horse 1,3
Could you explain me why everytime I get a distinct entry in the index, even when the sort key and the entry are the same? (or I only think they are, but they are not really)
Take a look at the tuc file ... ["list"]={ { "\\xmlfunction {main::6}{indexEntryForAnimal}", "dog" }, }, ["list"]={ { "\\xmlfunction {main::10}{indexEntryForAnimal}", "dog" }, }, so although the key is the same the entry isn't and we can't know which one to choose so we consider them variants (one can for instance have bold and slanted keys at the same time) you can wrap the entry in \expanded or nicer is: \setupregister[expansion=yes] but if your entries contain markup you might like this more: \startbuffer[test] <text> <div> <p>In this page there is a <name idref="id1">cat</name>, a <name idref="id2">dog</name> and a <name idref="id3">horse</name>.</p> </div> <div> <p>In this page there's only a <name idref="id2">fox terrier</name>.</p> </div> <div> <p>In this page you find a <name idref="id1">persian cat</name> and a <name idref="id3">horse</name>.</p> </div> <index> <entry id="id1" data-sort="cat">Cat</entry> <entry id="id2" data-sort="dog"><bf>Dog</bf></entry> <entry id="id3" data-sort="horse">Horse</entry> </index> </text> \stopbuffer \defineregister[Animal][Animals] \startxmlsetups xml:mysetup \xmlsetsetup{#1}{*}{xml:*} \stopxmlsetups \xmlregistersetup{xml:mysetup} \startxmlsetups xml:text \xmlfunction{#1}{collectIndex} \xmlflush{#1} \stopxmlsetups \startxmlsetups xml:bf \dontleavehmode {\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:div \xmlflush{#1} \page \stopxmlsetups \startxmlsetups xml:p \ignorespaces \xmlflush{#1} \removeunwantedspaces \par \stopxmlsetups \startxmlsetups xml:name \dontleavehmode \xmlfunction{#1}{indexEntry} {\bf\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:i \dontleavehmode {\em\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:index \placeregister[Animal] \stopxmlsetups \startxmlsetups xml:entry \xmlflush{#1} \stopxmlsetups \startluacode local animals = { } function xml.functions.collectIndex(e) for entry in xml.collected(e,"/index/entry") do local key = entry.at["data-sort"] local id = entry.at["id"] if key and id then animals[id] = { key = key, entry = entry, } end end end function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then context.Animal ( { animal.key }, "\\xmlflush{main::" .. animal.entry.ix .. "}" ) end end \stopluacode \starttext \xmlprocessbuffer{main}{test}{} \stoptext These #1 are pointers to nodes, so this way (via ix) you access the entry itself. Also watch how we collect all entries at the start. Hans ----------------------------------------------------------------- Hans Hagen | PRAGMA ADE Ridderstraat 27 | 8061 GH Hasselt | The Netherlands tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl -----------------------------------------------------------------
Thank you, Hans. I had found a solution, then I saw your e-mail. Anyway I've learned a lot from it. My solution was something like this: function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then --[[ context.Animal ( { animal.key }, "\\xmlflush{main::" .. animal.entry.ix .. "}" ) --]] context.Animal( { animal.key } ) context.bgroup() lxml.flush(animal.entry) context.egroup() end end The PDF output is right. Is it really the same? * * * Now an alternative approach I thought, because the index I'm working on is a biographical index, I mean a index of cited people, with a biographical profile. So every entry is a few lines long. That's why I thought to index only the ids in the text and use a textcommand in the index to pass from the id to the real entry. Suppose to use \setupregister[Animal][textcommand=\fromIdToEntry] and an indexEntry function modified like this: function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then context.Animal( { animal.key }, e.at.idref ) end end Without a textcommand, you would have an index of the ids. The textcommand \fromIdToEntry should take the id of an index entry and typeset the entry content. \def\fromIdToEntry#1{ ... } #1 is the id, but it's not a string. Is there a way to use it as a string to look for the entry? Is this approach better? Thank you, best regards, Massi
On Sat, 14 Jan 2017 15:52:25 +0100
MF
Now an alternative approach I thought, because the index I'm working on is a biographical index, I mean a index of cited people, with a biographical profile.
One can create an index with the publications module as follows: \defineregister [indexofauthors] \definebtrxregister [authors] [field=author, register=indexofauthors, method=always, dataset=default, alternative=invertedshort] ... \placeregister [indexofauthors] [compress=yes] This will create an index of all \cited authors. Note that one can index any field, for example title or keywords would be useful. I have not thought about how to add a biographical profile. Alan
On 1/14/2017 3:52 PM, MF wrote:
Thank you, Hans. I had found a solution, then I saw your e-mail. Anyway I've learned a lot from it. My solution was something like this:
function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then --[[ context.Animal ( { animal.key }, "\\xmlflush{main::" .. animal.entry.ix .. "}" ) --]] context.Animal( { animal.key } ) context.bgroup() lxml.flush(animal.entry) context.egroup() end end
The PDF output is right. Is it really the same?
no, but the effect can be the same (just compare the tuc files)
Now an alternative approach I thought, because the index I'm working on is a biographical index, I mean a index of cited people, with a biographical profile. So every entry is a few lines long. That's why I thought to index only the ids in the text and use a textcommand in the index to pass from the id to the real entry.
Suppose to use \setupregister[Animal][textcommand=\fromIdToEntry] and an indexEntry function modified like this:
function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then context.Animal( { animal.key }, e.at.idref ) end end
Without a textcommand, you would have an index of the ids. The textcommand \fromIdToEntry should take the id of an index entry and typeset the entry content.
\def\fromIdToEntry#1{ ... }
#1 is the id, but it's not a string. Is there a way to use it as a string to look for the entry? Is this approach better?
comparable btw, you can better use deeptextcommand as textcommand gets something more passed ----------------------------------------------------------------- Hans Hagen | PRAGMA ADE Ridderstraat 27 | 8061 GH Hasselt | The Netherlands tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl -----------------------------------------------------------------
Now an alternative approach I thought, because the index I'm working on is a biographical index, I mean a index of cited people, with a biographical profile. So every entry is a few lines long. That's why I thought to index only the ids in the text and use a textcommand in the index to pass from the id to the real entry.
Suppose to use \setupregister[Animal][textcommand=\fromIdToEntry] and an indexEntry function modified like this:
function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then context.Animal( { animal.key }, e.at.idref ) end end
Without a textcommand, you would have an index of the ids. The textcommand \fromIdToEntry should take the id of an index entry and typeset the entry content.
\def\fromIdToEntry#1{ ... }
#1 is the id, but it's not a string. Is there a way to use it as a string to look for the entry? Is this approach better?
comparable
btw, you can better use deeptextcommand as textcommand gets something more passed
I think I've understood (thanks Hans): \startbuffer[test] <text> <div> <p>In this page there is a <name idref="id1">cat</name>, a <name idref="id2">dog</name> and a <name idref="id3">horse</name>.</p> </div> <div> <p>In this page there's only a <name idref="id2">fox terrier</name>.</p> </div> <div> <p>In this page you find a <name idref="id1">persian cat</name> and a <name idref="id3">horse</name>.</p> </div> <index> <entry id="id1" data-sort="cat">Cat</entry> <entry id="id2" data-sort="dog"><bf>Dog</bf></entry> <entry id="id3" data-sort="horse">Horse</entry> </index> </text> \stopbuffer \defineregister[Animal][Animals] \setupregister[Animal][deeptextcommand=\idToEntry] \def\idToEntry#1{\ctxlua{id2entry([==[#1]==])}} \startxmlsetups xml:mysetup \xmlsetsetup{#1}{*}{xml:*} \stopxmlsetups \xmlregistersetup{xml:mysetup} \startxmlsetups xml:text \xmlfunction{#1}{collectIndex} \xmlflush{#1} \stopxmlsetups \startxmlsetups xml:bf \dontleavehmode {\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:div \xmlflush{#1} \page \stopxmlsetups \startxmlsetups xml:p \ignorespaces \xmlflush{#1} \removeunwantedspaces \par \stopxmlsetups \startxmlsetups xml:name \dontleavehmode \xmlfunction{#1}{indexEntry} {\bf\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:i \dontleavehmode {\em\xmlflush{#1}} \stopxmlsetups \startxmlsetups xml:index \placeregister[Animal] \stopxmlsetups \startxmlsetups xml:entry \xmlflush{#1} \stopxmlsetups \startluacode local animals = { } function xml.functions.collectIndex(e) for entry in xml.collected(e,"/index/entry") do local key = entry.at["data-sort"] local id = entry.at["id"] if key and id then animals[id] = { key = key, entry = entry, } end end end function xml.functions.indexEntry(e) local animal = animals[e.at.idref] if animal then context.Animal( { animal.key }, e.at.idref ) end end function id2entry(id) if animals[id] then lxml.flush(animals[id].entry) end end \stopluacode \starttext \xmlprocessbuffer{main}{test}{} \stoptext
participants (3)
-
Alan Braslau
-
Hans Hagen
-
MF