From 83e5ff1a011783be60d8725f1a340369311d9ec2 Mon Sep 17 00:00:00 2001 From: Marcin Chrzanowski Date: Tue, 3 Aug 2021 18:08:54 +0200 Subject: Initial commit --- mgr.tex | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 mgr.tex (limited to 'mgr.tex') diff --git a/mgr.tex b/mgr.tex new file mode 100644 index 0000000..86217d9 --- /dev/null +++ b/mgr.tex @@ -0,0 +1,175 @@ +% dodaj opcję [licencjacka] dla pracy licencjackiej +% dodaj opcję [en] dla wersji angielskiej (mogą być obie: [licencjacka,en]) +\documentclass[en]{pracamgr} + +% Dane magistranta: +\autor{Marcin Chrzanowski}{370754} + +\title{MSO Query Answering on Trees} +\titlepl{Odpowiadanie na zapytania MSO na drzewach} +\kierunek{Computer Science} + +% Praca wykonana pod kierunkiem: +% (podać tytuł/stopień imię i nazwisko opiekuna +% Instytut +% ew. Wydział ew. Uczelnia (jeżeli nie MIM UW)) +\opiekun{dr hab. Szymon Toruńczyk\\ + Institute of Informatics\\ + } + +% miesiąc i~rok: +\date{August 2021} + +%Podać dziedzinę wg klasyfikacji Socrates-Erasmus: +\dziedzina{ +%11.0 Matematyka, Informatyka:\\ +%11.1 Matematyka\\ +% 11.2 Statystyka\\ +11.3 Informatyka\\ +%11.4 Sztuczna inteligencja\\ +%11.5 Nauki aktuarialne\\ +%11.9 Inne nauki matematyczne i informatyczne +} + +%Klasyfikacja tematyczna wedlug AMS (matematyka) lub ACM (informatyka) +\klasyfikacja{% TODO + D. Software\\ + D.127. Blabalgorithms\\ + D.127.6. Numerical blabalysis} + +% Słowa kluczowe: +\keywords{MSO, query answering, trees, finite state automata, tree automata} + +% Tu jest dobre miejsce na Twoje własne makra i~środowiska: +\newtheorem{defi}{Definicja}[section] + +% koniec definicji + +\begin{document} +\maketitle + +%tu idzie streszczenie na strone poczatkowa +\begin{abstract} + % TODO +\end{abstract} + +\tableofcontents +%\listoffigures +%\listoftables + +\chapter*{Introduction} +\addcontentsline{toc}{chapter}{Introduction} + +\chapter{Preliminaries}\label{r:pojecia} + +% \begin{quote} + % Blaba, który jest blaba, nie jest prawdziwym blaba. + +% \raggedleft\slshape tłum. z~chińskiego Robert Blarbarucki +% \end{quote} + +\section{Definitions} + +\chapter{Branch Infix Regular Queries}\label{r:branchinfix} + +\section{Word infix regular queries} + +There is a simple and well-known data structure that, given a fixed automaton +$A$, preprocesses an input word $w$ in time $O(n)$, and afterwords is able to +answer in time $O(1)$ questions of the form ``Is the infix $w[i, j]$ accepted by +$A$?''. We present the full construction as we will be generalizing its +internals for the tree case. + +Suppose $A$ is deterministic. We begin by replacing each letter of $w$ with the +set of states of $A$, $Q$. + +Each letter $a$ of $w$ defines an injective function on $Q$, and we draw these +functions as directed edges between successive copies of $Q$. For example, if +$A$ in state $q$, reading $a$, would move to state $q'$, then, for any copy of +$Q$ corresponding to an instance of $a$ in the original word, there will be an +edge from $q$ there to $q'$ in the successive copy of $Q$. + +Now we will color the vertices of the graph we just constructed with the colors +$1, 2, \ldots, |Q|$ in such a way that + +\begin{enumerate} + \item every copy of $Q$ has one vertex of each of the $|Q|$ colors; + \item when a vertex of color $i$ has an edge to a vertex of color $j$ in a + successive copy of $Q$, then $i \geq j$. +\end{enumerate} + +The second point basically means that we will be trying to draw single-color +paths, but when paths need to join, it is the higher-colored path that gets cut +off. + +The construction is as follows: + +\begin{enumerate} + \item Color an arbitrary vertex of the first copy of $Q$ with the color $1$. + \item Follow the deterministic edges to the end of the word, coloring all + vertices along this path with $1$. + \item Now color another uncolored vertex of the first copy of $Q$ with the + color $2$. + \item Try following edges as far as possible, coloring all vertices with + $2$. + \item If your run into an already colored vertex, pick an arbitrary + uncolored vertex in this copy of $Q$ to color with $2$ and continue from + here. + \item Repeat steps 3.-5. for each successive color up to $|Q|$. +\end{enumerate} + +Additionally, in each vertex, we store the index of the next copy of $Q$ in +which the path of this vertex's color is broken by a lower color. + +\section{Least Common Ancestor via Range Minimum Query} + +\section{Highest Black Descendant on Path} + +We can now reduce the problem to the following: + +\emph{Given}: a tree $T$ with distinguished black vertices. + +\emph{Queries}: given a vertex $x$, its descendant $y$, find the node $z$ that +is the highest black node on the path between $x$ and $y$. + +We will build an index structure, constructible in linear time, that allows us +to handle such queries in constant time. The structure is heavily inspired by +Schieber and Vishkin's structure for computing LCA queries. We will present our +version of the structue in full detail, using Schieber and Vishkin's notation +where possible. + +\subsection{High level overview} + +The LCA algorithm given by Schieber and Vishkin bases on two observations, first +made by Harel and Tarjan: + +\begin{itemize} + \item LCA queries can be computed in constant time on \emph{simple paths}. + \item LCA queries can be computed in constant time on \emph{full binary trees}. +\end{itemize} + +We map vertices of our tree $T$ to vertices of a full binary tree $B$ of size +$O(n)$ that has two properties: + +\begin{enumerate} + \item The induced subgraph of all vertices mapped to the same node of $B$ is + a simple path in $T$. + \item If a vertex $v$ of $T$ is mapped to a vertex $b$ of $B$, all of its + descendants are mapped to descendants of $b$ (where $b$ is also a + descendant of itself). +\end{enumerate} + +From 2. we get that also all of $v$'s ancestors are mapped to $b$'s ancestors in +$B$, in particular they are all mapped into at most $\log(n)$ nodes of $B$. + +\section{Generalizing words to trees} + +\chapter{Conclusions} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% coding: latin-2 +%%% End: -- cgit v1.2.3