[ksh93-integration-discuss] Tree variable?
Roland Mainz
roland.mainz at nrubsig.org
Fri Aug 31 23:59:28 PDT 2007
Robert Neville wrote:
>
> While combing through the list archives I found the term 'tree
> variable' multiple times which is not documented in the ksh manual
> page.
AFAIK I used that multiple times while thinking about compound
variables. The compund variables can be used to build a "variable tree"
of unlimited (or better: Only limited by memory size...) depth to store
structured data
> Is this an undocumented feature or am I just not looking hard
> enough? How do I use tree variables in ksh?
... see David Korn's explanation at
http://mail.opensolaris.org/pipermail/ksh93-integration-discuss/2007-August/005622.html
for a simple usage example. I've attached a more complex demo
("xmldocumenttree1.ksh.txt") to this email which converts the following
XML document...
-- snip --
<br />
<score-partwise instrument="flute1">
<identification>
<kaiman>nocrocodile</kaiman>
</identification>
<partlist>
<foo>myfootext</foo>
<bar>mybartext</bar>
<snap />
<ttt>myttttext</ttt>
</partlist>
</score-partwise>
-- snip --
to the this DOM-like (see
http://en.wikipedia.org/wiki/Document_Object_Model) variable tree:
-- snip --
xdoc.nodes[0].tagattributes=''
xdoc.nodes[0].tagname=br
xdoc.nodes[0].tagtype=element
xdoc.nodes[1].tagtype=text
xdoc.nodes[1].tagvalue=$'\n'
xdoc.nodes[2].nodes[0].tagtype=text
xdoc.nodes[2].nodes[0].tagvalue=$'\n '
xdoc.nodes[2].nodes[1].nodes[0].tagtype=text
xdoc.nodes[2].nodes[1].nodes[0].tagvalue=$'\n\t '
xdoc.nodes[2].nodes[1].nodes[1].nodes[0].tagtype=text
xdoc.nodes[2].nodes[1].nodes[1].nodes[0].tagvalue=nocrocodile
xdoc.nodes[2].nodes[1].nodes[1].tagattributes=''
xdoc.nodes[2].nodes[1].nodes[1].tagname=kaiman
xdoc.nodes[2].nodes[1].nodes[1].tagtype=element
xdoc.nodes[2].nodes[1].nodes[2].tagtype=text
xdoc.nodes[2].nodes[1].nodes[2].tagvalue=$'\n '
xdoc.nodes[2].nodes[1].tagattributes=''
xdoc.nodes[2].nodes[1].tagname=identification
xdoc.nodes[2].nodes[1].tagtype=element
xdoc.nodes[2].nodes[2].tagtype=text
xdoc.nodes[2].nodes[2].tagvalue=$'\n '
xdoc.nodes[2].nodes[3].nodes[0].tagtype=text
xdoc.nodes[2].nodes[3].nodes[0].tagvalue=$'\n\t '
xdoc.nodes[2].nodes[3].nodes[1].nodes[0].tagtype=text
xdoc.nodes[2].nodes[3].nodes[1].nodes[0].tagvalue=myfootext
xdoc.nodes[2].nodes[3].nodes[1].tagattributes=''
xdoc.nodes[2].nodes[3].nodes[1].tagname=foo
xdoc.nodes[2].nodes[3].nodes[1].tagtype=element
xdoc.nodes[2].nodes[3].nodes[2].tagtype=text
xdoc.nodes[2].nodes[3].nodes[2].tagvalue=$'\n\t '
xdoc.nodes[2].nodes[3].nodes[3].nodes[0].tagtype=text
xdoc.nodes[2].nodes[3].nodes[3].nodes[0].tagvalue=mybartext
xdoc.nodes[2].nodes[3].nodes[3].tagattributes=''
xdoc.nodes[2].nodes[3].nodes[3].tagname=bar
xdoc.nodes[2].nodes[3].nodes[3].tagtype=element
xdoc.nodes[2].nodes[3].nodes[4].tagtype=text
xdoc.nodes[2].nodes[3].nodes[4].tagvalue=$'\n\t '
xdoc.nodes[2].nodes[3].nodes[5].tagattributes=''
xdoc.nodes[2].nodes[3].nodes[5].tagname=snap
xdoc.nodes[2].nodes[3].nodes[5].tagtype=element
xdoc.nodes[2].nodes[3].nodes[6].tagtype=text
xdoc.nodes[2].nodes[3].nodes[6].tagvalue=$'\n\t '
xdoc.nodes[2].nodes[3].nodes[7].nodes[0].tagtype=text
xdoc.nodes[2].nodes[3].nodes[7].nodes[0].tagvalue=myttttext
xdoc.nodes[2].nodes[3].nodes[7].tagattributes=''
xdoc.nodes[2].nodes[3].nodes[7].tagname=ttt
xdoc.nodes[2].nodes[3].nodes[7].tagtype=element
xdoc.nodes[2].nodes[3].nodes[8].tagtype=text
xdoc.nodes[2].nodes[3].nodes[8].tagvalue=$'\n '
xdoc.nodes[2].nodes[3].tagattributes=''
xdoc.nodes[2].nodes[3].tagname=partlist
xdoc.nodes[2].nodes[3].tagtype=element
xdoc.nodes[2].nodes[4].tagtype=text
xdoc.nodes[2].nodes[4].tagvalue=$'\n'
xdoc.nodes[2].tagattributes=instrument='"flute1"'
xdoc.nodes[2].tagname=score-partwise
xdoc.nodes[2].tagtype=element
-- snip --
, e.g. each XML element is converted into a compound variable with the
members "tagname", "tagtype" (type of node, e.g. "element" for normal
elements and "text" for text nodes), "tagattributes" (string which
contains a list of attributes) and "nodes" (an array of child elements).
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 7950090
(;O/ \/ \O;)
-------------- next part --------------
#!/bin/ksh93
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
#
# Solaris needs /usr/xpg4/bin/ because the tools in /usr/bin are not POSIX-conformant
export PATH=/usr/xpg4/bin:/bin:/usr/bin
function fatal_error
{
print -u 2 "${progname}: $@"
exit 1
}
function handle_document
{
#set -o xtrace
nameref doc=xdoc
nameref nodepath="${stack.items[stack.pos]}"
nameref nodesnum="${stack.items[stack.pos]}num"
nameref callbacks=${1}
typeset tag_type="$2"
typeset tag_value="$3"
typeset tag_attributes="$4"
case "${tag_type}" in
tag_begin)
nodepath[${nodesnum}]+=(
typeset tagtype="element"
typeset tagname="${tag_value}"
typeset tagattributes="${tag_attributes}"
typeset -A nodes=( )
integer nodesnum=0
)
(( stack.pos++ ))
stack.items[stack.pos]="${stack.items[stack.pos-1]}[${nodesnum}].nodes"
(( nodesnum++ ))
;;
tag_end)
(( stack.pos-- ))
;;
tag_text)
nodepath[${nodesnum}]+=(
typeset tagtype="text"
typeset tagvalue="${tag_value}"
)
(( nodesnum++ ))
;;
document_start)
;;
document_end)
;;
esac
# print "xmltok: '${tag_type}' = '${tag_value}'"
}
function xml_tok
{
typeset buf=""
typeset namebuf=""
typeset attrbuf=""
typeset c=""
typeset isendtag # bool: true/false
typeset issingletag # bool: true/false (used for tags like "<br />")
nameref callbacks=${1}
[[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start"
while IFS='' read -r -N 1 c ; do
isendtag=false
if [[ "$c" = "<" ]] ; then
# flush any text content
if [[ "$buf" != "" ]] ; then
[[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} "${1}" "tag_text" "$buf"
buf=""
fi
IFS='' read -r -N 1 c
if [[ "$c" = "/" ]] ; then
isendtag=true
else
buf="$c"
fi
IFS='' read -r -d '>' c
buf+="$c"
# check if the tag starts and ends at the same time (like "<br />")
if [[ "${buf}" = ~(Er).*/ ]] ; then
issingletag=true
buf="${buf%*/}"
else
issingletag=false
fi
# check if the tag has attributes (e.g. space after name)
if [[ "$buf" = ~(E)[[:space:][:blank:]] ]] ; then
namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
attrbuf="${buf#~(E).*[[:space:][:blank:]]}"
else
namebuf="$buf"
attrbuf=""
fi
if ${isendtag} ; then
[[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
else
[[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf"
# handle tags like <br/> (which are start- and end-tag in one piece)
if ${issingletag} ; then
[[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
fi
fi
buf=""
else
buf+="$c"
fi
done
[[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_start"]} "${1}" "document_end" "exit_success"
print # final newline to make filters like "sed" happy
}
function print_sample1_xml
{
cat <<EOF
<br />
<score-partwise instrument="flute1">
<identification>
<kaiman>nocrocodile</kaiman>
</identification>
<partlist>
<foo>myfootext</foo>
<bar>mybartext</bar>
<snap />
<ttt>myttttext</ttt>
</partlist>
</score-partwise>
EOF
}
function usage
{
OPTIND=0
getopts -a "${progname}" "${USAGE}" OPT '-?'
exit 2
}
# program start
builtin basename
builtin cat
builtin date
builtin uname
typeset progname="$(basename "${0}")"
USAGE=$'
[-?
@(#)\$Id: xmldocumenttree (Roland Mainz) 2007-08-31 \$
]
[+NAME?xmldocumenttree - XML tree demo]
[+DESCRIPTION?\bxmldocumenttree\b is a small ksh93 compound variable demo
which reads a XML input file and converts it into an internal
variable tree representation.]
[ file ]
[+SEE ALSO?\bksh93\b(1)]
'
while getopts -a "${progname}" "${USAGE}" OPT ; do
# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
case ${OPT} in
*) usage ;;
esac
done
shift $((OPTIND-1))
typeset xmlfile="$1"
if [[ "${xmlfile}" = "" ]] ; then
fatal_error $"No file given."
fi
xdoc=()
typeset -A xdoc.nodes=( )
integer xdoc.nodesnum=0
if false ; then ;
#this doesn't work anymore with ast-ksh.2007-06-28
stack=()
typeset -a stack.items=( [0]="doc.nodes" )
typeset -i stack.pos=0
else
stack=(
typeset -i pos=0
typeset -a items
)
stack.pos=0
stack.items[0]='doc.nodes'
fi
# setup callbacks for xml_tok
typeset -A document_cb # callbacks for xml_tok
document_cb["document_start"]="handle_document"
document_cb["document_end"]="handle_document"
document_cb["tag_begin"]="handle_document"
document_cb["tag_end"]="handle_document"
document_cb["tag_text"]="handle_document"
if [[ "${xmlfile}" = "#sample1" ]] ; then
print_sample1_xml | xml_tok document_cb
else
cat "${xmlfile}" | xml_tok document_cb
fi
print -u2 "#parsing completed."
set | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)" | fgrep -v ']=$'
print -u2 "#done."
exit 0
# EOF.
More information about the ksh93-integration-discuss
mailing list