From edc074ebd0586a7fcf46bc58d9d2a0c256422f26 Mon Sep 17 00:00:00 2001
From: Marcin Chrzanowski <mc370754@students.mimuw.edu.pl>
Date: Mon, 16 Aug 2021 10:40:10 -0400
Subject: Add and use definitions, first draft MSO query answering

---
 mgr.tex | 139 +++++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 86 insertions(+), 53 deletions(-)

(limited to 'mgr.tex')

diff --git a/mgr.tex b/mgr.tex
index 8cec592..cfb5bd2 100644
--- a/mgr.tex
+++ b/mgr.tex
@@ -6,6 +6,8 @@
 \usepackage[backend=biber]{biblatex}
 \addbibresource{mgr.bib}
 
+\usepackage{amsmath}
+\usepackage{amsfonts}
 
 % Dane magistranta:
 \autor{Marcin Chrzanowski}{370754}
@@ -160,7 +162,7 @@ $E$.
 
 We make use of a fundamental theorem tying MSO logic on trees and tree automata:
 
-\begin{theorem}
+\begin{theorem}\label{mso-to-automaton}
     For every MSO formula $\varphi$ over trees, there exists a tree automaton
     $A$ such that for every tree $T$, $T \models \varphi$ if, and only if $T \in
     L(A)$.
@@ -291,24 +293,24 @@ The construction is as follows:
 
 Additionally, for each vertex $v$, we store the index of the next copy of $Q$ in
 which the path of this vertex's color is broken by a lower color. Put this
-information in table $BREAK$.
+information in table \breaktable.
 
 To handle the query ``does $w[i, j] \in L$'', we look at vertex $v$, which is
-the vertex of $A$'s
-initial state in the $i$th copy of $Q$ and note its color $c$. Now we want to
-answer the following question: if we follow the edges of the graph until the
-$j$th copy of $Q$, what color will we end in? First, look at $k := BREAK[v]$. If
-$k \geq j$, then we know that in the $j$th copy of $Q$, the path we're interested
-in still has color $c$. Look at the state in this copy of $Q$ that's colored
-with $c$, if it's an accepting state answer YES, if not, answer NO.
+the vertex of $A$'s initial state in the $i$th copy of $Q$ and note its color
+$c$. Now we want to answer the following question: if we follow the edges of the
+graph until the $j$th copy of $Q$, what color will we end in? First, look at $k
+:= \breaktable[v]$. If $k \geq j$, then we know that in the $j$th copy of $Q$,
+the path we're interested in still has color $c$. Look at the state in this copy
+of $Q$ that's colored with $c$, if it's an accepting state answer YES, if not,
+answer NO.
 
 If $k < j$, then jump to the $k$th copy of $Q$ and take the edge from the vertex
 colored $c$ here to the next copy of $Q$. The vertex $v'$ we end up in will be
 colored with color $c' < c$. Continue as we did before, by looking at $k' :=
-BREAK[v']$, comparing it to $j$, and either halting if $k' \geq j$, or jumping
-again otherwise. Because with each jump we move to a color strictly smaller than
-before, the number of jumps is bounded by the number of colors, $|Q|$. Thus the
-query is answered in time constant with respect to $|w|$.
+\breaktable[v']$, comparing it to $j$, and either halting if $k' \geq j$, or
+jumping again otherwise. Because with each jump we move to a color strictly
+smaller than before, the number of jumps is bounded by the number of colors,
+$|Q|$. Thus the query is answered in time constant with respect to $|w|$.
 
 \chapter{Branch Infix Regular Queries}\label{branchinfix}
 
@@ -353,7 +355,7 @@ This process will again lead to a coloring with the desired properties that
 
 Now let's consider how we could answer a query ``is the branch infix from $x$
 down to $y$ in $L$?''. Here we can't proceed exactly as in the word case. We
-don't have a $BREAK$ table since a vertex in an internal node can have
+don't have a \breaktable\ table since a vertex in an internal node can have
 arbitrarily many points below it where its color is broken by a lower one.
 Somehow we need to be able to find such a break point that is ``in the direction
 of $y$ from $x$''.
@@ -383,39 +385,43 @@ the input array.
 
 We turn to describing the index structure for our tree problem.
 
-First, we create the array $POST$ of length $n$, which is the post-order of the
-nodes.
+First, we create the array \posttable\ of length $n$, which is the post-order of
+the nodes.
 
 Next, we label each node of the tree with its pre-order number. We create the
-array $PRE$ with the corresponding pre-order labels of the nodes in $POST$, i.e.
-if $POST[i] = v$, then $PRE[i]$ is $v$'s pre-order number.
-
-Finally, for each node of the tree, we record its index in $POST$ in the array
-$INDEX$.
-
-Observation: given a node $x$ and its descendant $y$, looking at the range
-$PRE[INDEX[y], INDEX[x] - 1]$, all the values in this range are descendants of
-$x$, and the values smaller than $PRE[INDEX[y]]$ correspond to ancestors of $y$.
-In particular, the minimum of that range corresponds to the highest ancestor of
-$y$ that's a descendant of $x$.
+array \prefixtable\ with the corresponding pre-order labels of the nodes in
+\posttable, i.e.  if $\posttable[i] = v$, then $\prefixtable[i]$ is $v$'s
+pre-order number.
+
+Finally, for each node of the tree, we record its index in \posttable\ in the
+array \indextable.
+
+\begin{observation}
+    given a node $x$ and its descendant $y$, looking at the range
+    $\prefixtable[\indextable[y], \indextable[x] - 1]$, all the values in this
+    range are descendants of $x$, and the values smaller than
+    $\prefixtable[\indextable[y]]$ correspond to ancestors of $y$.  In
+    particular, the minimum of that range corresponds to the highest ancestor of
+    $y$ that's a descendant of $x$.
+\end{observation}
 
 In our problem, we only care about ancestors of $y$ that are colored black. So
 we perform one final modification of our data structure: for all non-black
-vertices $v$, we change $PRE[INDEX[v]]$ to $\infty$ (which can be represented by
-$n+1$, an integer greater than any node's pre-order label). With $PRE$ modified
-like this, we observe that now the minimum value of $PRE[INDEX[y], INDEX[x] -
-1]$ corresponds exactly to the answer of our queries -- the highest black node
-between $x$ and $y$.
+vertices $v$, we change $\prefixtable[\indextable[v]]$ to $\infty$ (which can be
+represented by $n+1$, an integer greater than any node's pre-order label). With
+\prefixtable\ modified like this, we observe that now the minimum value of
+$\prefixtable[\indextable[y], \indextable[x] - 1]$ corresponds exactly to the
+answer of our queries -- the highest black node between $x$ and $y$.
 
-We preprocess $PRE$ for RMQ queries in linear time.
+We preprocess \prefixtable\ for RMQ queries in linear time.
 
 Now when given a query $x$, $y$, we:
 
 \begin{enumerate}
-    \item Lookup $i := INDEX[y]$ and $j := INDEX[x] - 1$.
-    \item Perform an RMQ query on $PRE[i, j]$, giving us index $k$ of the
-        minimal value in that range.
-    \item Lookup the corresponding vertex as $z := POST[k]$. This is the
+    \item Lookup $i := \indextable[y]$ and $j := \indextable[x] - 1$.
+    \item Perform an RMQ query on $\prefixtable[i, j]$, giving us index $k$ of
+        the minimal value in that range.
+    \item Lookup the corresponding vertex as $z := \posttable[k]$. This is the
         answer to our query.
 \end{enumerate}
 
@@ -426,7 +432,7 @@ infix regular query problem. Recall that we have replaced the input tree $T$'s
 vertices with copies of $Q$, then connected and colored them in the same way as
 in the word case. We additionally remember in each vertex a unique identifier of
 the connected component of vertices of the same color that it's in (call this
-data $COMPONENT[v]$).
+data $\componenttable[v]$).
 
 For each color $c$, we will preprocess the above graph for queries that answer
 the question ``if we start in a vertex $v$ colored with $c$, which is the first
@@ -439,22 +445,47 @@ Now query answering can proceed similar to the word case. Given a query ``does
 the word on vertices from $x$ down to $y$ belong to $L$?'' find the copy of $Q$
 corresponding to $x$ and consider the color $c$ corresponding to the initial
 state here. If in the copy of $Q$ corresponding to $y$, the vertex of color $c$
-has the same $COMPONENT$ information as the initial state we were considering,
-we can immediately answer whether or not $A$ will accept the word on this path
-based on whether this state is accepting.
+has the same \componenttable\ information as the initial state we were
+considering, we can immediately answer whether or not $A$ will accept the word
+on this path based on whether this state is accepting.
 
 Otherwise, we need to jump down to a lower copy of $Q$. We do this by issuing a
 highest marked descendant on path query on the marked tree we created for color
 $c$. The answer to this query exactly corresponds to the point where the path of
 color $c$ from our initial state merges into a lower color $c'$ on the path
-towards $y$. From here we continue as at the beginning, checking $COMPONENT$ to
-see if we can answer immediately, or taking another jump down. Handling each
-jump takes constant time, and the number of jumps is bounded $|Q|$ since we can
-only jump to a lower color. Thus we can answer branch infix regular queries in
-constant time.
+towards $y$. From here we continue as at the beginning, checking
+\componenttable\ to see if we can answer immediately, or taking another jump
+down. Handling each jump takes constant time, and the number of jumps is bounded
+$|Q|$ since we can only jump to a lower color. Thus we can answer branch infix
+regular queries in constant time.
 
 \chapter{MSO Query Answering on Trees}
 
+We now have all the pieces necessary to present our \qptime{$n$}{$m \log m$}
+algorithm for MSO query answering on trees. We fix an MSO formula
+$\varphi(\vec{X})$ and take a tree $T$ of size $n$. We proceed with several
+reductions so that we end up with needing to solve the relabel regular queries
+problem, which we solve in the remainder of this chapter.
+
+First of all, if $T$ is not binary, we can use a standard encoding to encode it
+inside of a binary tree $T'$, modifying $\varphi$ to $\varphi'$, such that $T
+\models \varphi(\vec{W})$ if and only if $T' \models \varphi'(\vec{W})$.
+
+Now, to use Theorem \ref{mso-to-automaton}, we turn the formula into an MSO
+sentence without free variables by adding a unary relation $U_X$ for each
+variable $X \in \vec{X}$ and instead of considering the formula $\varphi'$, we
+now consider the sentence
+
+Selecting a model in which the vertices in set $W$ are exactly those colored
+with unary relation $U_X$ is equivalent to a valuation of $\vec{X}$ in which the
+variable $X$ is set to $W$.
+
+\begin{equation*}
+    \varphi'' = \exists_{X_1} \ldots \exists_{X_k}
+        (\forall{}_x x \in X_i \implies U_{X_i}(x)) \land
+        \varphi'(X_1, \ldots, X_k)
+\end{equation*}
+
 \section{Relabel Regular Queries on Trees}
 
 Fix a tree automaton $A$ and take a $\Sigma$-labeled binary tree $T$.
@@ -478,9 +509,9 @@ We also consider subtrees with holes: the \definedterm{subtree of $v$ with hole
 $w$} is the subtree of $v$ minus the subtree of $w$ (for $w$ a descendant of
 $v$).
 
-For an LCA closed set $W$ in tree $T$, we define the \definedterm{LCA partition with
-respect to $W$} to be a partition of $T$'s vertices into subtrees, subtrees with
-holes, and individual vertices created according to the following rules:
+For an LCA closed set $W$ in tree $T$, we define the \definedterm{LCA partition
+with respect to $W$} to be a partition of $T$'s vertices into subtrees, subtrees
+with holes, and individual vertices created according to the following rules:
 
 \begin{enumerate}
     \item If $v \in W$ is a maximal element with respect to the ancestor
@@ -513,14 +544,16 @@ simply the partition with respect to $W$'s LCA closure.
 The LCA closure of a set of $m$ vertices $W$ can be computed in time $O(m \log
 m)$ after linear preprocessing of the tree $T$. In the preprocessing step, in
 addition to preprocessing for LCA queries, we will assign each vertex $v$ its
-in-order number, $IN[v]$.
+in-order number, $\infixtable[v]$.
 
 Now to compute the closure of $W$, we first sort the vertices in $W$ with
 respect to their in-order numbers, so we end up with a list $v_1, \ldots, v_m$
-of vertices such that $IN[v_1] < \ldots < IN[v_m]$.
+of vertices such that $\infixtable[v_1] < \ldots < \infixtable[v_m]$.
 
-Lemma: if $IN[u] < IN[v] < IN[w]$, then $\mathlca(u, w)$ is equal to either
-$\mathlca(u, v)$ or $\mathlca(v, w)$.
+\begin{lemma}
+    if $\infixtable[u] < \infixtable[v] < \infixtable[w]$, then $\mathlca(u, w)$
+    is equal to either $\mathlca(u, v)$ or $\mathlca(v, w)$.
+\end{lemma}
 
 This may be proved by a simple case analysis.
 
-- 
cgit v1.2.3