--- /dev/null
+
+#include "TToken.hh"
+#include "LPushLexer.hh"
+#include "APushParser.hh"
+
+LPushLexer::LPushLexer(ALogger& l, APushParser& p) : APushLexer(l, p)
+{
+ state = ACCEPT;
+}
+
+void
+LPushLexer::reset()
+{
+ buffer.erase();
+ state = ACCEPT;
+}
+
+void
+LPushLexer::flush()
+{
+ push(-1);
+}
+
+void
+LPushLexer::transaction(char ch, State newState)
+{
+ switch (ch)
+ {
+ case '{': parser.push(TToken(TToken::BEGIN)); break;
+ case '}': parser.push(TToken(TToken::END)); break;
+ case '$': parser.push(TToken(TToken::SHIFT)); break;
+ case '&': parser.push(TToken(TToken::ALIGN)); break;
+ case '\n':
+ case '\r': parser.push(TToken(TToken::EOL, ch)); break;
+ case '^': parser.push(TToken(TToken::SUPERSCRIPT)); break;
+ case '_': parser.push(TToken(TToken::SUBSCRIPT)); break;
+ case '\t':
+ case ' ': parser.push(TToken(TToken::SPACE, ch)); break;
+ case '~': parser.push(TToken(TToken::ACTIVE, ch)); break;
+ case '%': parser.push(TToken(TToken::COMMENT)); break;
+ default:
+ if (isalpha(ch)) parser.push(TToken(TToken::LETTER, ch));
+ else if (isdigit(ch)) parser.push(TToken(TToken::DIGIT, ch));
+ else parser.push(TToken(TToken::OTHER, ch));
+ break;
+ }
+ state = newState;
+}
+
+void
+LPushLexer::push(char ch)
+{
+ switch (state)
+ {
+ case ACCEPT:
+ if (ch == '\\') state = ESCAPE;
+ else if (ch == '#') state = PARAMETER;
+ else if (ch == -1) ;
+ else if (isalpha(ch))
+ {
+ buffer.push_back(ch);
+ state = LONG_IDENTIFIER;
+ }
+ else transaction(ch, ACCEPT);
+ break;
+ case ESCAPE:
+ if (isalpha(ch))
+ {
+ buffer.push_back(ch);
+ state = MACRO;
+ }
+ else if (ch == -1) error();
+ else
+ {
+ parser.push(TToken(TToken::CONTROL, ch));
+ state = ACCEPT;
+ }
+ break;
+ case MACRO:
+ if (ch == '\\')
+ {
+ parser.push(TToken(TToken::CONTROL, buffer));
+ buffer.erase();
+ state = ESCAPE;
+ }
+ else if (ch == '#')
+ {
+ parser.push(TToken(TToken::CONTROL, buffer));
+ buffer.erase();
+ state = PARAMETER;
+ }
+ else if (isalpha(ch))
+ buffer.push_back(ch);
+ else if (ch == -1)
+ {
+ parser.push(TToken(TToken::CONTROL, buffer));
+ buffer.erase();
+ state = ACCEPT;
+ }
+ else
+ {
+ parser.push(TToken(TToken::CONTROL, buffer));
+ buffer.erase();
+ if (isspace(ch)) state = IGNORE_SPACE;
+ else transaction(ch, ACCEPT);
+ }
+ break;
+ case IGNORE_SPACE:
+ if (ch == '\\') state = ESCAPE;
+ else if (ch == '#') state = PARAMETER;
+ else if (isspace(ch)) ;
+ else if (ch == -1) state = ACCEPT;
+ else if (isalpha(ch))
+ {
+ buffer.push_back(ch);
+ state = LONG_IDENTIFIER;
+ }
+ else transaction(ch, ACCEPT);
+ break;
+ case PARAMETER:
+ if (ch == -1) error();
+ else
+ {
+ parser.push(TToken(TToken::PARAMETER, ch));
+ state = ACCEPT;
+ }
+ break;
+ case LONG_IDENTIFIER:
+ if (ch == -1)
+ {
+ parser.push(TToken(TToken::LETTER, buffer));
+ buffer.erase();
+ state = ACCEPT;
+ }
+ else if (isalpha(ch) || isdigit(ch))
+ {
+ buffer.push_back(ch);
+ }
+ else if (isspace(ch))
+ {
+ parser.push(TToken(TToken::LETTER, buffer));
+
+ // the parser ignores spaces. But in this case, the space
+ // is a significant. So, we transform this space in the MACRO
+ // \; which will not be ignored.
+ // This is not a good solution. Having a special token, that will be
+ // interpreted as "function application" is a better one.
+ buffer.erase();
+ buffer = ";";
+ parser.push(TToken(TToken::CONTROL, buffer));
+ buffer.erase();
+ state = IGNORE_SPACE;
+ }
+ else if (ch == '\\')
+ {
+ parser.push(TToken(TToken::LETTER, buffer));
+ buffer.erase();
+ state = ESCAPE;
+ }
+ else
+ {
+ parser.push(TToken(TToken::LETTER, buffer));
+ buffer.erase();
+ transaction(ch, ACCEPT);
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (state)
+ {
+ case ESCAPE: parser.setCursorHint("\\"); break;
+ case MACRO: parser.setCursorHint("\\" + buffer); break;
+ case PARAMETER: parser.setCursorHint("#"); break;
+ case LONG_IDENTIFIER: parser.setCursorHint(buffer); break;
+ default: parser.setCursorHint(""); break;
+ }
+}
+
+void
+LPushLexer::drop(bool alt)
+{
+ std::string restore = "";
+
+ switch (state)
+ {
+ case ACCEPT:
+ case IGNORE_SPACE:
+ restore = parser.drop();
+ if (restore.length() > 0 && restore[0] == '\\')
+ {
+ buffer = std::string(restore, 1, restore.length() - 1);
+ state = (buffer.length() > 0) ? MACRO : ESCAPE;
+ }
+ break;
+ case ESCAPE:
+ state = ACCEPT;
+ break;
+ case MACRO:
+ if (alt) buffer.erase();
+ else buffer.erase(buffer.length() - 1, 1);
+ if (buffer.length() == 0) state = ESCAPE;
+ break;
+ case LONG_IDENTIFIER:
+ buffer.erase(buffer.length() - 1, 1);
+ if (buffer.length() == 0) state = ACCEPT;
+ break;
+ case PARAMETER:
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (state)
+ {
+ case ESCAPE: parser.setCursorHint("\\"); break;
+ case MACRO: parser.setCursorHint("\\" + buffer); break;
+ case PARAMETER: parser.setCursorHint("#"); break;
+ default: parser.setCursorHint(""); break;
+ }
+}
+
+bool
+LPushLexer::error() const
+{
+ return false;
+}
}
}
+bool
+TPushParser::correctBrace()
+{
+ // this method MUST be invoked when the cursor is child of a
+ // phantom group, which in turn is the last rightOpen MACRO's child.
+ // The only way to exit from a rightOpen MACRO is opening a group before
+ // inserting the MACRO and, once the MACRO is completely inserted, closing
+ // the group.
+ // This method return true if the condition above is true. False, otherwise.
+ assert(cursor.parent() && cursor.parent().isG() && !cursor.parent().hasId());
+ TNode parent = cursor.parent();
+ assert(parent.parent() && parent.parent().isC());
+ assert(!frames.empty());
+ Frame& frame = frames.top();
+ assert(frame.entry.rightOpen);
+ assert(parent.parent().last() == parent);
+
+ TNode c = parent.parent();
+ bool stop = false;
+ bool ok = false;
+ TNode node = c.parent();
+ do
+ {
+ if (node.isG() && node.hasId())
+ {
+ // in this case, the rightOpen MACRO is a child of a group with id.
+ // So, the '}' is correct
+ ok = true;
+ stop = true;
+ }
+ else if (node.isG())
+ {
+ // the MACRO is a phantom group's child. We have to control why we
+ // have this phantom group
+ TNode nodeParent = node.parent();
+ if (nodeParent && nodeParent.isC())
+ {
+ // we have to control the nature of this MACRO
+ const TDictionary::Entry& entry = dictionary.find(nodeParent.nameC());
+ if (entry.rightOpen && node == nodeParent.last())
+ {
+ // in this case we have to re-iterate the process
+ node = nodeParent.parent();
+ }
+ else stop = true;
+ }
+ else stop = true;
+ }
+ else
+ {
+ // at the moment we assume that a MACRO cannot be child of an element other than a group
+ stop = true;
+ }
+ }
+ while (!stop);
+
+ return ok;
+}
+
+void
+TPushParser::do_end()
+{
+ TNode parent = cursor.parent();
+ if (parent && parent.isG() && parent.hasId())
+ {
+ // normal closing brace for an explicitly open group
+ cursor.remove();
+ advance(parent);
+ }
+ else if (parent && parent.isG() && parent.parent() && parent.parent().is("cell"))
+ {
+ assert(!frames.empty());
+ // closing brace for a structure in which & or \cr have been used
+ TNode row = parent.parent().parent();
+ assert(row && row.is("row"));
+ assert(row.parent());
+ advance(row);
+ }
+ else if (parent && parent.isG() && !parent.hasId() && parent.parent() && !parent.parent().is("math"))
+ {
+ // In this case, we have to control the cursor's grand parent.
+ TNode gparent = parent.parent();
+
+ if (gparent.isC() && gparent.last() == parent)
+ {
+ // a frame MUST be in the stack
+ assert(!frames.empty());
+
+ // we have to control the nature of this macro
+ if (frames.top().entry.rightOpen)
+ {
+ // in this case, the '}' character is the proper way to exit from the phantom group, and
+ // in particular, this character means that the user wants to exit from the MACRO.
+ // A rightOpen MACRO MUST be descendant of a group with Id. This '}' is the closing brace of this
+ // group. So, we have to control if this group exists. This groyp could exist, but this MACRO could
+ // be another MACRO's child, so we have to control this last MACRO recursively. This recurive control
+ // is done by the correctBrace method.
+ if (!correctBrace())
+ {
+ // the '}' is not correct
+ logger.warning("nothing to close");
+ }
+ else
+ {
+ cursor.remove();
+ advance(parent);
+ }
+
+ }
+ else
+ {
+ logger.error("closing brace ignored");
+ }
+ }
+ else
+ {
+ // at the moment, a phantom group with cursor can be a MACRO's child or a cell's child, and these cases
+ // are handled in other blocks of code.
+ logger.error("do_end: strange TML tree");
+ }
+ }
+ else
+ {
+ // In this case, there is a redundant '}', so we can ignore it and
+ // emit an error
+ logger.warning("There is so no corresponding'{'");
+ //assert(0);
+ }
+}
+
+
+/*
void
TPushParser::do_end()
{
//assert(0);
}
}
+*/
void
TPushParser::do_shift()
}
else
{
- // In TeX, the user can type
- // $a-b\over2a+b$
- // In this case, '$' character is correct.
- // I don't think so, if user types
- // $12+{a-b\over2a+b$
- // because the group is not closed with a '}'
logger.error("parser: math shift");
}
}
void
TPushParser::do_space(const std::string&)
{
- // ? may be used to distinguish tokens in some mode?
+ logger.debug("do_space");
}
void
// the control below is designed to handle the case in which val have more than one unicode character
DOM::UCS4String ucs4val(prev.element().getAttribute("val"));
- if ((ucs4val.length() <= 1) || prev.element().hasAttribute("name"))
+ if ((prev.is("i")) || (ucs4val.length() <= 1) || prev.element().hasAttribute("name"))
{
cursor.remove();
prev.replace(cursor);
void
TPushParser::gdelete_prev_script()
{
- // this method delete an sp or an sb preceding the cursor
+ // this method deletes an sp or an sb preceding the cursor
assert(cursor.prev());
assert(cursor.parent());
TNode prev = cursor.prev();
assert(prev.is("sp") || prev.is("sb"));
cursor.remove();
prev.append(cursor);
- // i can invoke the gdelet_prev, because a sp (sb) MUST have two children
+ // we can invoke the gdelete_prev, because a sp (sb) MUST have two children
gdelete_prev();
}
cursor.remove();
prev.append(cursor);
- // a group may have no children, so the gdelete_prev is not appropriate
- // so this method is not equivalent to the one above
+ // a group could have no children, so the gdelete_prev is not appropriate
+ // so, this method is not equivalent to the one above
do_gdelete();
}
if (!entry.defined())
{
- // We can assume tha the user want to completely delete the undefined macro
+ // We can assume that the user want to completely delete the undefined macro
cursor.remove();
prev.replace(cursor);
}
cursor.remove();
prev.last().append(cursor);
- // the gdelete_prev is not appropriate, because the last child of the MACRO may have no children
+ // the gdelete_prev is not appropriate, because the last child of the MACRO could have no children
do_gdelete_phantom_group();
}
else if (entry.leftOpen)
// In this case, we do not have to push a frame in the stack, because we remove the
// MACRO immediately, substituting it with the content of the phantom group.
// At the moment, we don't remove the last child of the phantom group, but
- // i don't know if it's the correct behavior of the graphical deleting.
- // To delete it, just remove the comment of the last instruction of this block
- // of code.
+ // it's not clear if it's the correct behavior of the graphical deleting.
+ // To delete it, just remove the comment of the last instruction of the
+ // if (g.size()) block.
assert(prev.first());
assert(prev.first().isG());
assert(prev.first() == prev.last());
TNode g = prev.first();
- g.remove();
- prev.replace(g.first(), TNode());
- //do_gdelete();
+ if (g.size())
+ {
+ // in this case, the phantom group has at least one child, so we can call the
+ // TNode::replace.
+ g.remove();
+ prev.replace(g.first(), TNode());
+ //gdelete_prev();
+ }
+ else
+ {
+ // otherwise, the phantom group has no children, so we remove it, also the MACRO.
+ cursor.remove();
+ g.remove();
+ prev.replace(cursor);
+ }
}
else if (!entry.pattern.empty())
{
// we have to start to remove a MACRO which accepts arguments.
- // If the MACRO accepts arguments, the MACRO has one or more children
+ // If the MACRO accepts arguments, the MACRO has at least one child
assert(prev.size() >= 1);
// Differnt actions must be taken, based on the nature of the last child
- // of the MACRO. We have to distinguish the case in which it's a delimited argument
+ // of the MACRO. We have to distinguish the case in which it's a delimited argument,
// frome the one in which it's a not delimited argument.
if (prev.last().isG() && !prev.last().hasId())
{
} // end of if (!entry.pattern.empty())
else
{
- // if we are here, the MACRO preceding the cursor, is !(rightOpen ||leftOpen),
+ // if we are here, the MACRO preceding the cursor, is !(rightOpen || leftOpen),
// and has no pattern. It means that it has no children.
// We can replace it with the cursor
assert(prev.size() == 0);
{
// in this case we have removed a MACRO's child.
// So, we have to update the member pos of the frame in the stack
- // I can assert that this MACRO accepts arguments.
+ // We can assert that this MACRO accepts arguments.
assert(!frames.empty());
Frame& frame = frames.top();
assert(frame.pos > 0);
TPushParser::do_gdelete_script()
{
// If we are here, the cursor is child of a script (sp or sb) and
- // this means that a prev MUST exist and that there is one and only one
+ // this means that a prev does exist and that there is one and only one
// element preceding the cursor. The sp's (or sb's) parent
// MUST NOT be a MACRO.
// The element preceding the cursor is the base of the script.
parent.replace(cursor);
// if the new parent is a group with Id and the cursor is the only
- // element of this group, we have to remove the group. This controls are made
+ // element of this group, we have to remove the group. These controls are made
// in the method rgreplace_father().
if (cursor.parent().isG() && cursor.parent().hasId()) rgreplace_father();
}
assert(cursor.parent().size() > 1);
}
-} // end of method do_gdelet_script
+} // end of method do_gdelete_script
void
TPushParser::do_gdelete_macro()
// If the phantom group is an sp's child, it means that the user has removed all \' in the
// phantom group.
// We can remove the phamtom group and the sp element. But we also can only remove the
- // phantom group, giving to the user the possibility of inserting an exponent.
+ // phantom group, giving the user the possibility of inserting an exponent.
// At the moment, we remove the sp element (and the phantom group), because it may be the correct
// behavior of a graphical deleting.
cursor.remove();
parent.replace(cursor);
- // now we have an sp element with two children: the first child (we don't know nothing about it)
+ // now we have an sp element with two children: the first child (we don't know anything about it)
// and the cursror.
assert(cursor.parent().size() == 2);
// to delete the script we can invoke the do_gdelete_script(), which will do all controls we need.
- // To give the possibility of insering an exponent
+ // To give the possibility of insetring an exponent, just remove the following istruction.
do_gdelete_script();
}
else if (parent.parent().isSp())
{
// in this case, the cursor'parent is in the second and last child
// of the MACRO. We can assert that the grand father has two
- // children, which are both phantom group
+ // children, which are both phantom groups
assert(gfather.size() == 2);
assert((gfather.last() == parent) && (gfather.first().isG() && !gfather.first().hasId()));
assert(frame.pos == 0);
assert(ggfather);
cursor.remove();
parent.remove();
- // i have to replace the gfather with the elements of its first child
- gfather.replace(gfather.first().first(), TNode());
- ggfather.append(cursor);
+ // we have to replace the gfather with the elements of its first child, but this group may have no
+ // children.
+ if (gfather.first().size())
+ {
+ gfather.replace(gfather.first().first(), TNode());
+ ggfather.append(cursor);
+ }
+ else
+ {
+ // in this case, the MACRO has to be replaced with the cursor
+ gfather.first().remove();
+ gfather.replace(cursor);
+ }
// now we have the situation preceding the insertion of the leftOpen and rightOpen MACRO.
// this MACRO no longer exists.
frames.pop();
// we can delete the phantom group;
// we can delete the superscript.
// At the moment we implement the first solution. To implement the second one, just remove
- // delete the line code after the logger.warning and remove comment from the remaining lines
+ // the line code after the logger.warning and remove comments from the remaining lines
logger.warning("parser: TML tree in a strange state, we try to recover from it");
parent.replace(cursor);
}
else
{
- // cursor' grand father is undefined
+ // cursor's grand father is undefined
logger.error("parser: TML tree is in a unknown state");
}
} // end of the else of the if (prev)
{
// we ca do two thing...we can remove the math mode (it implies controlling the display attribute), we can do nothing
// At the moment, the user's will of deleting is simply ignored
- logger.error("TML tree not well structured");
+ logger.warning("nothing to delete");
}
else if (parent.isG())
{
if (frame.entry.pattern[frame.pos + 1] == token)
{
// The token matches with a delimiter of the argument,
- // so we increment the frame.pos
+ // hence we increment the frame.pos
frame.pos++;
if (frame.entry.lastDelimiter(frame.pos))
if (frame.entry.pattern[frame.pos].category != TToken::PARAMETER)
{
// in this case, there is a sequence of delimiters that delimitates
- // the argument, and the user correctly inserted a portion of this
+ // the argument, and the user has correctly inserted a portion of this
// sequence, but now has inserted a wrong delimiter.
// Here, there are some possibilities:
// - ignore the token, and wait for the correct delimiter
return "";
}
+
+void
+TPushParser::advance(const TNode& node)
+{
+ assert(node);
+
+ if (!node.parent())
+ {
+ // this is an error
+ logger.error("wrong TML tree");
+ }
+ else if (node.parent().isG())
+ {
+ TNode next = node.next();
+ if (next) next.insert(cursor);
+ else node.parent().append(cursor);
+ }
+ else if (node.parent().isC())
+ {
+ assert(!frames.empty());
+ if (frames.top().pos == frames.top().entry.pattern.size())
+ {
+ if (frames.top().entry.rightOpen)
+ {
+ // we have to remove the frame from the stack
+ frames.pop();
+ advance(node.parent().parent());
+ }
+ else
+ {
+ frames.pop();
+ advance(node.parent());
+ }
+ }
+ else if (frames.top().entry.paramDelimited(frames.top().pos))
+ {
+ // the next argument is delimited, so we have to create a phantom group
+ TNode g = doc.createG();
+ g.append(cursor);
+ node.parent().append(g);
+ }
+ else
+ {
+ // the next argumet is not delimited, so we have to append the cursor
+ // to the MACRO
+ node.parent().append(cursor);
+ }
+ }
+ else advance(node.parent());
+}
+
+
+/*
+ * This version handles the case in which we have to
+ * create a delimited argument
+
+void
+TPushParser::advance(const TNode& node)
+{
+ assert(node);
+ TNode parent = node.parent();
+ if (!parent)
+ ; // nothing to do, the cursor is not in the document any more
+ else if (parent.isG())
+ {
+ TNode next = node.next();
+ if (next) next.insert(cursor);
+ else parent.append(cursor);
+ }
+ else if (parent.isC())
+ {
+ if (node.next())
+ ; // cursor removed
+ else
+ {
+ Frame& frame = frames.top();
+ if (frame.pos == frame.entry.pattern.size())
+ {
+ frames.pop();
+ advance(parent);
+ }
+ else if (frame.entry.paramDelimited(frame.pos))
+ {
+ // the next argument is delimited, so we have to create a phantom group
+ // with the cursor inside. We have to remember that, since we are here,
+ // the cursor has been removed
+ TNode g = doc.createG();
+ g.append(cursor);
+ parent.append(g);
+ }
+ else
+ {
+ // otherwise, the next MACRO's argument is not delimited, so we just
+ // append the cursor to the MACRO
+ parent.append(cursor);
+ }
+ }
+ }
+ else if (parent.is("math"))
+ ; // we are done
+ else
+ advance(parent);
+}
+*/
+
+/*
+ * original advance
void
TPushParser::advance(const TNode& node)
{
else
advance(parent);
}
+*/
+
void
TPushParser::setCursorHint(const std::string& c)