-nolist
*============================================================================*
* Band2xml.spt - Convert any Lexware band format dictionary to XML
* 20040402
*
* A band is a hanging paragraph with a band label in column one of
* the first line of the band. Subsequent lines of the band have a
* space or tab in column 1. A main entry is indicated with a dot,
* ".", prefixed to the band label. Sub-entries have two or more
* dots prefixed to the band label. Blocks of bands may be indicated
* within entries or sub-entries by an integer prefixed to the band
* label. This is called the "mode".
*
* This program turns band labels into XML tags. For example, a band:
*
* df this is a definition
*
* will be converted to:
*
* this is a definition
*
* Entire entries and sub-entries as indicated by the prefixed dots
* are surrounded by and tags. The opening
* tag is given two attributes: level and id. The level attribute
* corresponds to the number of dots prefixed to the label. The id
* attribute is the content of the dotted band. For example, a band
* format entry like:
*
* .hw aabachi
* cl Ic/3; It/3
* df to teach (a subject), instruct
* ..hw imaabachi
* df to teach (a subject) to
* ...hw imaabachitilka
* df to be taught (a subject)
* ..hw istilaabachi
* df to study
*
* Will be converted to XML as (indentation added):
*
*
* aabachi
* Ic/3; It/3
* to teach (a subject), instruct
*
* imaabachi
* to teach (a subject) to
*
* imaabachitilka
* to be taught (a subject)
*
*
*
* istilaabachi
* to study
*
*
*
* The entire file is enclosed in tags.
* The tag has two attribures: id and date. The id
* attribute is the name of the file without the extension.
*
* Note that a significant difference between band format and XML
* is that band format allows unlimited skipping of levels. So it is
* possible to have, for example, a '...' level follow a '.' level
* skipping the '..' level. In such a case the depth is 2 but the
* sub-entry level is 3. So the program has to keep track of the depth
* of embedding independently of the sub-entry level as indicated by
* prefixed dots. The depth of embedded sub-entries is not necessarily
* the same as the sub-entry level.
*
*
* This program is designed to compile under Catspaw Spitbol. Minor
* changes here and there such as in the Input and Output association
* statements and the HOST() functions would be needed for this to
* compile under other versions.
*
* After compiling to a .exe file it can be run in two ways: 1) simply
* drag and drop the band-format file to Band2xml.exe, or 2) at a DOS
* command prompt type:
*
* band2xml filename
*
* Where "filename" is the complete name of the band-format input file.
*============================================================================*
terminal = 'Band2xml version 1.0 montler@unt.edu'
terminal =
* Initialize some constants.
&stlimit = -1 ;*Release statement count limit
&trim = 1 ;*Trim trailing blanks on input
cr = char(13) ;*Carriage return
lf = char(10) ;*Line feed
tab = char(9) ;*Tab character
bell = char(7) ;*Bell character
*==== DEFINED FUNCTIONS =====================================================*
* Two functions are initialized and defined here.
*
*----------------------------------------------------------------------------*
* getpar() reads a hanging paragraph. Band format records are in "hanging
* paragraph" form. The first line of the record begins with a band
* label in column 1 and subsequent lines in the record are indented
* one or more spaces/tabs
*----------------------------------------------------------------------------*
define('getpar()')
recpat = pos(0) span(' ' tab) rem . rec
:(eogetpar)
getpar ident(eofflag) :f(freturn)
rec = infile :f(readpau)
rec recpat :f(getparpau)
rec1 = rec1 ' ' rec :(getpar)
getparpau getpar = rec1
rec1 = rec :(return)
readpau eofflag = 1
getpar = rec1 :(return)
eogetpar
*----------------------------------------------------------------------------*
* xmlentity() replaces <, >, &, ', and " with the XML conventions. This
* also make sure that tags will be legal for XML. For Lexware
* band labels may start with any character except '.1234567890'
* or space or tab. XML tag names must start with a letter and
* may not start with the sequence 'xml'. The solution used here
* is to append 'Z_' to the front of any band label that
* violates the XML requirements for tag names. There may be
* a better way to deal with this.
* The first argument is the string to be corrected; the second
* argument is a flag used if the first argument is a label.
*----------------------------------------------------------------------------*
define('xmlentity(xmlentity,xmlabel)')
lowercase = 'abcdefghijklmnopqrstuvwxyz'
uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
:(eoxmlentity)
xmlentity
xmlentity differ(xmlabel)
+ pos(0) notany(uppercase lowercase) :f(xmlentity0)
xmlentity = 'Z_' xmlentity :(xmlentity1)
xmlentity0 replace(xmlentity,uppercase,lowercase)
+ differ(xmlabel) pos(0) 'xml' :f(xmlentity1)
xmlentity = 'Z_' xmlentity
xmlentity1 xmlentity '&' = char(1) 'amp;' :s(xmlentity1)
xmlentity2 xmlentity '<' = '<' :s(xmlentity2)
xmlentity3 xmlentity '>' = '>' :s(xmlentity3)
xmlentity4 xmlentity "'" = ''' :s(xmlentity4)
xmlentity5 xmlentity '"' = '"' :s(xmlentity5)
xmlentity6 xmlentity char(1) 'amp;' = '&' :s(xmlentity6)f(return)
eoxmlentity
*==== END OF DEFINED FUNCTIONS===============================================*
*=== I/O ====================================================================*
* The input file is the file dragged and dropped onto Band2xml.exe. These
* statements break up the input file name into path and. For example, if
* the input file is "c:\lex\language.txt" the path is "c:\lex\" and the
* filename is "language". The output will go, for this example, to
* "c:\lex\language.xml".
*----------------------------------------------------------------------------*
* Get the input file from the command line or the dragged and dropped file.
* If help is requested with /? or /H, or if there's no file name, give help.
* Separate the infile name from the command.
* Separate the filename, path, and extension.
* Output will go to a file with the same name and path but with
* the extension ".xml".
inputfilename = trim(host(0))
inputfilename '/' any('hH?') | pos(0) null rpos(0) :s(help)
inputfilename arb ('.exe' | '.exe') span(' ') rem . inputfilename
reverse(inputfilename) pos(0) (break('.') '.' | null)
+ (break('\') | rem) . filename
+ rem . path
path = reverse(path)
filename = reverse(filename)
outputfilename = path filename '.xml'
*--------------------------------------------------------------------------
* Check for the existence of the outputfilename. If it exists, ask if it's
* okay to overwrite. If not okay, send error message and abend.
* If it is okay, then continue.
(input(.checkifexists,1,outputfilename) endfile(1)) :f(goinput)
terminal = 'File ' outputfilename ' already exists. ' cr lf
+ 'Okay to overwrite it? (Yes/No)' dupl(bell,4)
reply = host(8) ;* Get one character from the keyboard.
terminal = dupl(cr lf,4) ;* Then reposition the cursor.
* If the character typed is y or Y, ok. Else send error message.
reply '89' | '121' :f(erroutfilename)
goinput
* Now make the INPUT association.
* The OUTPUT association, outfile, is made later after the first good
* band is found.
input(.infile,'infile',inputfilename) :f(help)
* term is for output to the screen as the program is running to let the
* user know what's happening. term differs from the built-in terminal
* in that the former does not append cr lf automatically.
output(.term,'term','con[-m0-n0]')
*============================================================================*
* Initialize the band parsing pattern.
parseband = pos(0) (span('.') | nul) . level
+ (span('1234567890') . mode | nul)
+ (break('# ' tab) | rem) . label
+ (span('# ' tab) | nul) rem . body
* Find the first entry. If there is no first entry, send error message.
fe band = getpar() :f(fileerr01)
band pos(0) '.' notany('.') :f(fe)
* It's a band-format file so now make the OUTPUT association.
output(.outfile,'outfile',outputfilename) :f(outputerr)
term = lf 'Converting Lexware band format ' inputfilename cr lf
+ ' to XML in ' outputfilename cr lf lf
* Start the XML output.
outfile = '' cr lf
+ '' cr lf
:(parse) ;*Jump into the LOOP to process the first band.
*==== MAIN LOOP =============================================================*
* Now loop through the rest.
LOOP
level = mode = label = body =
* Read in and parse a band.
band = getpar() :f(EOF)
ident(band) :s(LOOP)
parse band parseband
ident(label) :s(labelerr)
* Clean up the label and body of the band.
body = xmlentity(body)
label = xmlentity(label,1)
* If it's not a dotted band just send it out.
eq(size(level),0) :s(out)
* So it is dotted. If the level is greater than the level of the previous
* dotted band, increase the depth of embedding. Then go out.
depth = gt(size(level),curlevel) depth + 1 :s(levelout)
* If the new level is the same as the previous level, just close the
* (sub-)entry and go out.
outfile = eq(size(level),curlevel) '' cr lf :s(levelout)
* If we get to here, it's dotted and we're coming up to a higher level. The
* the number of closing '' tags needed is equal to D - L + 1.
* Where L is the new level and D is the depth of embedding.
outfile = dupl('' cr lf,depth - size(level) + 1)
depth = size(level)
levelout
outfile = eq(size(level),1) null
outfile = ''
entcount = eq(size(level),1) entcount + 1 ;*Count the entry
term = cr entcount ;*Display the count
curlevel = size(level) ;*Reset the previous level
out mode = differ(mode) ' mode="' mode '"'
outfile = '<' label mode '>' body '' label '>' :(LOOP)
*==== END OF MAIN LOOP ======================================================*
*==== ERROR MESSAGES ==============================================================
filenameerr terminal = 'Input file name error.' :(abend)
errnoinfile terminal = 'Input file not found.' :(abend)
fileerr01 terminal = 'Input file is not in the Lexware band format. ' :(abend)
labelerr terminal = 'Empty band label.' :(abend)
outputerr terminal = 'Unable to create ' outputfilename ' for output.'
+ cr lf ' File may be in use.' :(abend)
erroutfilename terminal = 'Rename ' outputfilename ' and try again.' :(abend)
*==== END OF ERROR MESSAGES =======================================================
help terminal = 'Drag a Lexware band format file to band2xml.exe to create '
+ cr lf 'a new file in XML format.' :(abend)
*Close the last entry and shut up shop.
EOF outfile = dupl('' cr lf,depth - size(level)) ''
term = ' entries.' cr lf lf
terminal = 'End of Band2xml.' bell
abend
terminal = cr lf 'Press any key to exit.'
wait host(7) :f(wait)
end