[ksh93-integration-discuss] Compound variable outpuit suddenly has more '.''s ... / was: Re: 2007-09-22 beta source tarballs
Roland Mainz
roland.mainz at nrubsig.org
Fri Sep 21 18:06:13 PDT 2007
Glenn Fowler wrote:
>
> beta 2007-09-22 source tarballs have been posted at
>
> http://www.research.att.com/sw/download/beta/
Thanks! :-)
> the outstanding bug reports for ksh93s+ have been addressed
> barring dumb mistakes this should be the last ksh93s+ update
Erm, I found a weired issue which may affect compound variable
serialisation/deserialisation:
Running the attached demo script ("xmldocumenttree1.ksh") as...
$ ksh xmldocumenttree1.ksh "#sample1" tree #
... results in a compound variable value where some members suddenly
have a '.' in front of the names, e.g.
-- snip --
#parsing completed.
(
typeset -A nodes=(
[0]=(
typeset -l -i nodesnum=0
tagname=br
tagtype=element
)
[1]=(
tagtype=text
tagvalue=$'
'
)
[2]=(
typeset -A .nodes=(
[0]=(
-- snip --
Note the ".nodes" for the array entry "2" (this isn't the only instance
where this happens but the output is a litte bit large... I uploaded the
full output of ast-ksh.2007-05-15 to
http://opensolaris.pastebin.ca/raw/706573 , the output of
ast-ksh.2007-09-22 as http://opensolaris.pastebin.ca/raw/706574 and an
unified diff as http://opensolaris.pastebin.ca/706575).
)
Anything before this point (tested with ast-ksh.2007-05-15 and
ast-ksh.2007-06-28) only used "nodes" as name... I guess this is a bug,
right ?
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 7950090
(;O/ \/ \O;)
-------------- next part --------------
#!/bin/ksh93
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
#
# Solaris needs /usr/xpg4/bin/ because the tools in /usr/bin are not POSIX-conformant
export PATH=/usr/xpg4/bin:/bin:/usr/bin
function fatal_error
{
print -u 2 "${progname}: $@"
exit 1
}
function attrstrtoattrarray
{
#set -o xtrace
typeset s="$1"
nameref aa=$2 # attribute array
integer aa_count=0
integer aa_count=0
typeset nextattr
integer currattrlen=0
typeset tagstr
typeset tagval
while (( ${#s} > 0 )) ; do
# skip whitespaces
while [[ "${s:currattrlen:1}" = ~(E)[[:blank:][:space:]] ]] ; do
(( currattrlen++ ))
done
s="${s:currattrlen:${#s}}"
# anything left ?
(( ${#s} == 0 )) && break
# Pattern tests:
#x="foo=bar huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=[^[:blank:]\"]*}"
#x='foo="ba=r o" huz=123' ; print "${x##~(E)[[:alnum:]_-:]*=\"[^\"]*\"}"
#x="foo='ba=r o' huz=123" ; print "${x##~(E)[[:alnum:]_-:]*=\'[^\"]*\'}"
#x="foox huz=123" ; print "${x##~(E)[[:alnum:]_-:]*}"
# All pattern combined via eregex (w|x|y|z):
#x='foo="bar=o" huz=123' ; print "${x##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\')}"
nextattr="${s##~(E)([[:alnum:]_-:]*=[^[:blank:]\"]*|[[:alnum:]_-:]*=\"[^\"]*\"|[[:alnum:]_-:]*=\'[^\"]*\'|[[:alnum:]_-:]*)}"
currattrlen=$(( ${#s} - ${#nextattr}))
# add entry
tagstr="${s:0:currattrlen}"
if [[ "${tagstr}" = *=* ]] ; then
# normal case: attribute with value
tagval="${tagstr#*=}"
# strip quotes ('' or "")
if [[ "${tagval}" = ~(Elr)(\'.*\'|\".*\") ]] ; then
tagval="${tagval:1:${#tagval}-2}"
fi
aa[${aa_count}]=( name="${tagstr%%=*}" value="${tagval}" )
else
# special case for HTML where you have something like <foo baz>
aa[${aa_count}]=( name="${tagstr}" )
fi
(( aa_count++ ))
(( aa_count > 1000 )) && fatal_error "$0: aa_count too large" # assert
done
}
function handle_document
{
#set -o xtrace
nameref callbacks=${1}
typeset tag_type="${2}"
typeset tag_value="${3}"
typeset tag_attributes="${4}"
nameref doc=${callbacks["arg_tree"]}
nameref nodepath="${stack.items[stack.pos]}"
nameref nodesnum="${stack.items[stack.pos]}num"
case "${tag_type}" in
tag_begin)
nodepath[${nodesnum}]+=(
typeset tagtype="element"
typeset tagname="${tag_value}"
typeset -A tagattributes=( )
typeset -A nodes=( )
integer nodesnum=0
)
# fill attributes
if [[ "${tag_attributes}" != "" ]] ; then
# the following doesn't work yet (this is bug, dgk has a fix for it)
if false ; then
attrstrtoattrarray "${tag_attributes}" "nodepath[${nodesnum}].tagattributes"
else
attrstrtoattrarray "${tag_attributes}" "${!nodepath}[${nodesnum}].tagattributes"
fi
fi
(( stack.pos++ ))
stack.items[stack.pos]="${stack.items[stack.pos-1]}[${nodesnum}].nodes"
(( nodesnum++ ))
;;
tag_end)
(( stack.pos-- ))
;;
tag_text)
nodepath[${nodesnum}]+=(
typeset tagtype="text"
typeset tagvalue="${tag_value}"
)
(( nodesnum++ ))
;;
tag_comment)
nodepath[${nodesnum}]+=(
typeset tagtype="comment"
typeset tagvalue="${tag_value}"
)
(( nodesnum++ ))
;;
document_start)
;;
document_end)
;;
esac
# print "xmltok: '${tag_type}' = '${tag_value}'"
}
function xml_tok
{
typeset buf=""
typeset namebuf=""
typeset attrbuf=""
typeset c=""
typeset isendtag # bool: true/false
typeset issingletag # bool: true/false (used for tags like "<br />")
nameref callbacks=${1}
[[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start"
while IFS='' read -r -N 1 c ; do
isendtag=false
if [[ "$c" = "<" ]] ; then
# flush any text content
if [[ "$buf" != "" ]] ; then
[[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} "${1}" "tag_text" "$buf"
buf=""
fi
IFS='' read -r -N 1 c
if [[ "$c" = "/" ]] ; then
isendtag=true
else
buf="$c"
fi
IFS='' read -r -d '>' c
buf+="$c"
# handle comments
if [[ "$buf" = ~(El)!-- ]] ; then
# did we read the comment completely ?
if [[ "$buf" != ~(Elr)!--.*-- ]] ; then
buf+=">"
while [[ "$buf" != ~(Elr)!--.*-- ]] ; do
IFS='' read -r -N 1 c || break
buf+="$c"
done
fi
[[ ! -z "${callbacks["tag_comment"]}" ]] && ${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}"
buf=""
continue
fi
# check if the tag starts and ends at the same time (like "<br />")
if [[ "${buf}" = ~(Er).*/ ]] ; then
issingletag=true
buf="${buf%*/}"
else
issingletag=false
fi
# check if the tag has attributes (e.g. space after name)
if [[ "$buf" = ~(E)[[:space:][:blank:]] ]] ; then
namebuf="${buf%%~(E)[[:space:][:blank:]].*}"
attrbuf="${buf#~(E).*[[:space:][:blank:]]}"
else
namebuf="$buf"
attrbuf=""
fi
if ${isendtag} ; then
[[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
else
[[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf"
# handle tags like <br/> (which are start- and end-tag in one piece)
if ${issingletag} ; then
[[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf"
fi
fi
buf=""
else
buf+="$c"
fi
done
[[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_start"]} "${1}" "document_end" "exit_success"
print # final newline to make filters like "sed" happy
}
function print_sample1_xml
{
cat <<EOF
<br />
<score-partwise instrument="flute1">
<identification>
<kaiman>nocrocodile</kaiman>
</identification>
<!-- a comment -->
<partlist>
<foo>myfootext</foo>
<bar>mybartext</bar>
<snap />
<!-- another
comment -->
<ttt>myttttext</ttt>
</partlist>
</score-partwise>
EOF
}
function usage
{
OPTIND=0
getopts -a "${progname}" "${USAGE}" OPT '-?'
exit 2
}
# program start
builtin basename
builtin cat
builtin date
builtin uname
typeset progname="$(basename "${0}")"
USAGE=$'
[-?\n@(#)\$Id: xmldocumenttree1 (Roland Mainz) 2007-09-15 \$\n]
[-author?Roland Mainz <roland.mainz at nrubsig.org]
[+NAME?xmldocumenttree1 - XML tree demo]
[+DESCRIPTION?\bxmldocumenttree\b is a small ksh93 compound variable demo
which reads a XML input file, converts it into an internal
variable tree representation and outputs it in the format
specified by viewmode (either "list", "namelist" or "tree").]
file viewmode
[+SEE ALSO?\bksh93\b(1)]
'
while getopts -a "${progname}" "${USAGE}" OPT ; do
# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
case ${OPT} in
*) usage ;;
esac
done
shift $((OPTIND-1))
typeset xmlfile="$1"
typeset viewmode="$2"
if [[ "${xmlfile}" = "" ]] ; then
fatal_error $"No file given."
fi
if [[ "${viewmode}" != ~(Elr)(list|namelist|tree) ]] ; then
fatal_error $"Invalid view mode \"${viewmode}\"."
fi
xdoc=()
typeset -A xdoc.nodes=( )
integer xdoc.nodesnum=0
if false ; then ;
#this doesn't work anymore with ast-ksh.2007-06-28
stack=()
typeset -a stack.items=( [0]="doc.nodes" )
typeset -i stack.pos=0
else
stack=(
typeset -i pos=0
typeset -a items
)
stack.pos=0
stack.items[0]='doc.nodes'
fi
# setup callbacks for xml_tok
typeset -A document_cb # callbacks for xml_tok
document_cb["document_start"]="handle_document"
document_cb["document_end"]="handle_document"
document_cb["tag_begin"]="handle_document"
document_cb["tag_end"]="handle_document"
document_cb["tag_text"]="handle_document"
document_cb["tag_comment"]="handle_document"
# argument for "handle_document"
document_cb["arg_tree"]="xdoc"
if [[ "${xmlfile}" = "#sample1" ]] ; then
print_sample1_xml | xml_tok document_cb
else
cat "${xmlfile}" | xml_tok document_cb
fi
print -u2 "#parsing completed."
case "${viewmode}" in
list)
set | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)" | fgrep -v ']=$'
;;
namelist)
typeset + | egrep "xdoc.*(tagname|tagtype|tagval|tagattributes)"
;;
tree)
print -- "${xdoc}"
;;
*)
fatal_error $"Invalid view mode \"${viewmode}\"."
;;
esac
print -u2 "#done."
exit 0
# EOF.
More information about the ksh93-integration-discuss
mailing list