// my_simple_cat.ts

import { readLines } from "https://deno.land/std/io/mod.ts";

for await (let line of readLines(Deno.stdin)) {
  console.log(line);
}

$ cat my_simple_cat.ts | deno run my_simple_cat.ts | cat -A
import { readLines } from "https://deno.land/std/io/mod.ts";$
$
for await (let line of readLines(Deno.stdin)) {$
  console.log(line);$
}$
$

改行を変化させないようにしたもの

上記の簡易版では改行の情報が失われて困るので、そうならないようにしたもの。

改行が LF または CRLF であることを前提にしています。
文字エンコーディングが UTF-8 であることを前提にしています。
パフォーマンスについては調べていません。もっと効率のよい書き方はあると思います。
- Deno.Buffer とか使うとよい？

// my_cat.ts

const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder();
const LF = "\n".charCodeAt(0);

class ByteBuffer {
  bytes: number[];

  constructor() {
    this.bytes = [];
  }

  push(val: number) {
    this.bytes.push(val);
  }

  toLine() {
    return textDecoder.decode(
      new Uint8Array(this.bytes),
    );
  }
}

class StdinReader {
  buf: ByteBuffer;

  constructor() {
    this.buf = new ByteBuffer();
  }

  async read(
    fn: (line: string) => void,
  ) {
    const readBuf = new Uint8Array(1024);

    const numRead = await Deno.stdin.read(readBuf);
    if (numRead === null) {
      return null;
    }

    for (let i = 0; i < numRead; i++) {
      const val = readBuf[i];
      this.buf.push(val);

      if (val === LF) {
        fn(this.buf.toLine());
        this.buf = new ByteBuffer();
      }
    }

    return numRead;
  }

  async eachLine(fn: (line: string) => void) {
    while (true) {
      const numRead = await this.read(fn);

      if (numRead === null) {
        fn(this.buf.toLine());
        break;
      }
    }

    return null;
  }
}

const print = (str: string) => {
  Deno.stdout.writeSync(
    textEncoder.encode(str),
  );
};

new StdinReader().eachLine((line) => print(line));

エンコーディングが UTF-8
改行が CRLF
ファイル末尾の改行あり

なファイルで確認:

$ export PS1='--------\n$ '
--------
$ cat end_with_newline.txt 
あいうえお
aa
bb
--------
$ cat -A end_with_newline.txt 
M-cM-^AM-^BM-cM-^AM-^DM-cM-^AM-^FM-cM-^AM-^HM-cM-^AM-^J^M$
aa^M$
bb^M$
--------
$ cat end_with_newline.txt | deno run my_cat.ts | cat -A
M-cM-^AM-^BM-cM-^AM-^DM-cM-^AM-^FM-cM-^AM-^HM-cM-^AM-^J^M$
aa^M$
bb^M$
--------
$

エンコーディングが UTF-8
改行が CRLF
ファイル末尾の改行なし（ここだけ上のと違う）

なファイルで確認:

$ export PS1='--------\n$ '
--------
$ cat end_without_newline.txt 
あいうえお
aa
bb--------
$ cat -A end_without_newline.txt 
M-cM-^AM-^BM-cM-^AM-^DM-cM-^AM-^FM-cM-^AM-^HM-cM-^AM-^J^M$
aa^M$
bb--------
$ cat end_without_newline.txt | deno run my_cat.ts | cat -A
M-cM-^AM-^BM-cM-^AM-^DM-cM-^AM-^FM-cM-^AM-^HM-cM-^AM-^J^M$
aa^M$
bb--------
$

バージョン

$ deno -V
deno 1.2.0

参考

この記事を読んだ人はこちらも（たぶん）読んでいます

memo88.hatenablog.com

2020-07-11

Kotlin: 標準入力を読んで行ごとに処理

Kotlin

簡易版

お手軽に済ませたいならこれでよいっぽい。

readLine - Kotlin Programming Language

// Cat1.kts

while (true) {
    val line : String? = readLine()
    if (line == null) {
        break
    }
    println(line)
}

$ cat Cat1.kts | kotlin Cat1.kts | cat -A
while (true) {$
    val line : String? = readLine()$
    if (line == null) {$
        break$
    }$
    println(line)$
}$

改行を変化させない＋エンコーディング指定版

上記の簡易版では改行の情報が失われて困るので、そうならないようにしたもの。

改行が LF または CRLF であることを前提にしています。
パフォーマンスについては調べていません。もっと効率のよい書き方はあると思います。BufferedReader 使うとか。

// Cat2.kts

import java.nio.charset.StandardCharsets
import java.io.InputStreamReader
import java.io.OutputStreamWriter

InputStreamReader(System.`in`, StandardCharsets.UTF_8).use { isr ->
    OutputStreamWriter(System.out, StandardCharsets.UTF_8).use { osw ->
        val sb = StringBuilder()
        while (true) {
            val n = isr.read()
            if (n == -1) {
                break
            }
            val c = n.toChar()
            sb.append(c)
            if (c == '\n') {
                val line = sb.toString()
                osw.write(line)
                sb.clear()
            }
        }
        val line = sb.toString()
        osw.write(line)
    }
}

エンコーディングが UTF-8
改行が CRLF
ファイル末尾の改行あり

なファイルで確認（区切り線っぽく見える ----... はプロンプトの一部）:

--------------------------------
$ cat end_with_newline.txt 
あ
aa
bb
--------------------------------
$ cat -A end_with_newline.txt 
M-cM-^AM-^B^M$
aa^M$
bb^M$
--------------------------------
$ cat end_with_newline.txt | kotlin Cat2.kts | cat -A
M-cM-^AM-^B^M$
aa^M$
bb^M$
--------------------------------
$

エンコーディングが UTF-8
改行が CRLF
ファイル末尾の改行なし（ここだけ上のと違う）

なファイルで確認:

--------------------------------
$ cat end_without_newline.txt 
あ
aa
bb--------------------------------
$ cat -A end_without_newline.txt 
M-cM-^AM-^B^M$
aa^M$
bb--------------------------------
$ cat end_without_newline.txt | kotlin Cat2.kts | cat -A
M-cM-^AM-^B^M$
aa^M$
bb--------------------------------
$

バージョン

$ kotlin -version
Kotlin version 1.3.72-release-468 (JRE 1.8.0_252-8u252-b09-1~18.04-b09)

この記事を読んだ人は（ひょっとしたら）こちらも読んでいます

memo88.hatenablog.com

2020-07-11

四則演算と剰余のみのexprコマンドをKotlinで作ってみた

Kotlin

Kotlin に入門しています。

まずは何か適当なものを作りながら慣れようということで、四則演算と剰余のみのexprコマンドをRubyで作ってみたを移植してみました。手書きの再帰下降パーサです。

# (100 - 2 - 1) / (1 + 2) % 5 * 3
# => 97 / 3 % 5 * 3
# => 32 % 5 * 3
# => 2 * 3
# => 6

## 実行の例
$ kotlin MyExpr.kts -- \( 100 - 2 - 1 \) / \( 1 + 2 \) % 5 \* 3
6

## 確認のため同じ引数で expr コマンドを実行
$ expr \( 100 - 2 - 1 \) / \( 1 + 2 \) % 5 \* 3
6

// MyExpr.kts

enum class Op(val symbol: String) {
    ADD("+"), SUB("-"),
    MUL("*"), DIV("/"), MOD("%")
}

abstract class Node () {
    abstract fun eval(): Int
}

class NumberNode (val n: Int) : Node() {
    override fun eval(): Int = this.n
}

class BinopNode (
    val op: Op,
    val left: Node,
    val right: Node
) : Node() {
    override fun eval(): Int {
        return (
            when (this.op) {
                Op.ADD -> this.left.eval() + this.right.eval()
                Op.SUB -> this.left.eval() - this.right.eval()
                Op.MUL -> this.left.eval() * this.right.eval()
                Op.DIV -> this.left.eval() / this.right.eval()
                Op.MOD -> this.left.eval() % this.right.eval()
            }
        )
    }
}

class Parser (val tokens: List<String>) {
    final val NUMERIC_CHARS = setOf('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')

    var cur = 0

    // --------------------------------

    class ParseException(msg: String) : RuntimeException(msg)

    fun currentToken(): String = this.tokens.get(this.cur)

    fun isAdditive(): Boolean {
        if (this.tokens.size <= this.cur) {
            // end of tokens
            return false
        }
        return setOf("+", "-").contains(currentToken())
    }

    fun isMultiply(): Boolean {
        if (this.tokens.size <= this.cur) {
            // end of tokens
            return false
        }
        return setOf("*", "/", "%").contains(currentToken())
    }

    fun consume(token: String, exception: Boolean = false): Boolean {
        if (currentToken() == token) {
            this.cur += 1
            return true
        } else {
            if (exception) {
                throw  ParseException("expected <${token}> / got <${currentToken()}>")
            }
            return false
        }
    }

    // --------------------------------

    fun parse(): Node = parseExpression()

    fun parseExpression(): Node = parseAdditive()

    fun parseAdditive(): Node {
        var node = parseMultiply()

        while (isAdditive()) {
            val (op, multiply) = parseAdditiveTail()
            node = BinopNode(op, node, multiply)
        }

        return node
    }

    fun parseAdditiveTail(): Pair<Op, Node> {
        val op =
            when {
                consume("+") -> Op.ADD
                consume("-") -> Op.SUB
                else -> {
                    throw ParseException("expected '+' or '-' / got <${currentToken()}>")
                }
            }

        return Pair(op, parseMultiply())
    }

    fun parseMultiply(): Node {
        var node = parseFactor()

        while (isMultiply()) {
            val (op, factor) = parseMultiplyTail()
            node = BinopNode(op, node, factor)
        }

        return node
    }

    fun parseMultiplyTail(): Pair<Op, Node> {
        val op =
            when {
                consume("*") -> Op.MUL
                consume("/") -> Op.DIV
                consume("%") -> Op.MOD
                else -> {
                    throw ParseException("expected '*', '/' or '%' / got <${currentToken()}>")
                }
            }

        return Pair(op, parseFactor())
    }

    fun parseFactor(): Node {
        if (consume("(")) {
            val exp = parseExpression()
            consume(")", true)
            return exp
        } else {
            return parseNumber()
        }
    }

    fun parseNumber(): NumberNode {
        val token = currentToken()
        this.cur += 1
        if (isNumber(token)) {
            return NumberNode(
                Integer.valueOf(token)
            )
        } else {
            throw ParseException("invalid number (${token})")
        }
    }

    fun isNumber(token: String): Boolean {
        val firstIndex =
            if (token.get(0) == '-') {
                1
            } else {
                0
            }

        for (i in firstIndex .. (token.length - 1)) {
            val c = token.get(i)
            if (! NUMERIC_CHARS.contains(c)) {
                return false
            }
        }

        return true
    }
}

// --------------------------------

val tokens = args.toList()
val tree = Parser(tokens).parse()
val result = tree.eval()
println(result)

四則演算と剰余のみのexprコマンドをRubyで作ってみた

Ruby

Zenn に引っ越しました。

zenn.dev

2020-06-28

hive.server2.enable.doAs がよく分からなかったので Apache Bigtop で調べてみた

Hive

hive.server2.enable.doAs の設定によって何がどう変わるかよく分からなかったので Apache Bigtop を使って調べてみました。

hive.server2.enable.doAs だと長くて煩雑なので以下では適宜 doAs と略しています。

まとめ
バージョンなど
一応公式の説明
調査1: doAsの設定による違い
調査2: OSのユーザとproxy userの関係

まとめ

先にまとめ。調べた結果を基に書いていますが、厳密な裏付けはありません（ソースを読んで調べたりはしていません）。

proxy user というしくみがある
- Apache Hadoop 2.8.5 – Proxy user - Superusers Acting On Behalf Of Other Users
- hiveserver2 のプロセスを実行しているユーザではなく、他のユーザになりすましてジョブの submit や HDFS へのアクセスを行うしくみ
- （Hive ではなく）Hadoop の機能
  - core-site.xml の hadoop.proxyuser.〜 で設定する
- hive.server2.enable.doAs が false の場合、なりすましを行わず、 hiveserver2 のプロセスを実行しているユーザで操作が実行される
なりすましを行うには、下記の両方が必要
- OSのユーザとして存在している
- core-site.xml の proxyuser の設定に対象ユーザが含まれている（※1）
beeline の場合は、なりすましたいユーザ名を -n オプションで指定する
なりすましを行うと、HDFS上で作られるデータベースやテーブルのディレクトリ、データファイルの所有者がそのユーザになる

hive.server2.enable.doAs で設定しているのは要するに何なのかということで言えば、「Hadoop の proxy user 機能を利用するかどうか」と思ってよさそうな挙動でした。なので、利用する場合は proxy user についても知る必要があります。

※1 Apache Hadoop 2.8.5 – Proxy user - Superusers Acting On Behalf Of Other Users によれば、ホストによる指定、グループによる指定、ユーザによる指定を組み合わせて指定できるようです。

バージョンなど

Bigtop は下記の時点の master を使っています。

34e0bd7182c713b16dce9a4bdc803c8ed7fb9eb3
Thu Jun 11 09:01:26 2020 +0000

Hadoop 2.8.5
Hive 2.3.3

一応公式の説明

Setting Up HiveServer2 - Apache Hive - Apache Software Foundation

Impersonation

By default HiveServer2 performs the query processing as the user who submitted the query. But if the following parameter is set to false, the query will run as the user that the hiveserver2 process runs as.

hive.server2.enable.doAs – Impersonate the connected user, default true.

impersonate は「なりすます」という意味。エラーメッセージでも出てきます。

調査1: doAsの設定による違い

準備

設定を変えてプロビジョニングしなおすのを繰り返すやり方だと時間がかかってしまうので、Bigtop のリポジトリをクローンしたディレクトリを2つ用意して true/false それぞれの設定にします。

デフォルトでは true なので、false の方のディレクトリのみ hive-site.xml を修正。

--- a/bigtop-deploy/puppet/modules/hadoop_hive/templates/hive-site.xml
+++ b/bigtop-deploy/puppet/modules/hadoop_hive/templates/hive-site.xml
@@ -81,7 +81,7 @@
 
 <property>
    <name>hive.server2.enable.doAs</name>
-   <value>true</value>
+   <value>false</value>
 </property>
 
 <property>

config_centos-7.yaml を修正して Hive コンポーネントを追加。

--- a/provisioner/docker/config_centos-7.yaml
+++ b/provisioner/docker/config_centos-7.yaml
@@ -19,6 +19,6 @@ docker:
 
 repo: "http://repos.bigtop.apache.org/releases/1.4.0/centos/7/$basearch"
 distro: centos
-components: [hdfs, yarn, mapreduce]
+components: [hdfs, yarn, mapreduce, hive]
 enable_local_repo: false
 smoke_test_components: [hdfs, yarn, mapreduce]

設定の変更はこれだけ。

単一ノードで create します。

time ./docker-hadoop.sh -C config_centos-7.yaml --create 1

これで Hadoop と Hive が使えるようになります。 Bigtop すばらしい……ありがたや……。

コンテナに入る。

./docker-hadoop.sh --exec 1 bash

以下、コンテナ内の作業。

testuser というユーザがすでに存在しているので、そっちにスイッチします。（こういう用途で使うために用意されているものなのか分かっていませんが、とりあえず一般ユーザのつもりで使います）

su - testuser

接続

beeline で hiverserver2 に接続

beeline -u "jdbc:hive2://localhost:10000"

doAs=false の場合: 接続に成功する。
doAs=true の場合: 接続に失敗して次のようなメッセージが出ます（適宜改行を加えています）。

Error: Could not open client transport with JDBC Uri: 
jdbc:hive2://localhost:10000: Failed to open new session: 
java.lang.RuntimeException: 
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): 
User: hive is not allowed to impersonate anonymous (state=08S01,code=0)

この場合は -n オプションでユーザ名を指定すると接続できるようになります。

beeline -u "jdbc:hive2://localhost:10000" -n testuser

ちなみに、Bigtop の既定の設定では hiveserver2 は hive ユーザで実行されますが、hiveserver2 を root ユーザで実行すると次のようなメッセージになります。

User: root is not allowed to impersonate anonymous (state=08S01,code=0)
      ^^^^
      ここが変わる

「hiveserver2 の実行ユーザが他のユーザになりすます」ということが試みられているようです。

OSに存在しないユーザ名を指定した場合

beeline -u "jdbc:hive2://localhost:10000" -n nobody

doAs=false の場合: 接続に成功する。
doAs=true の場合: 接続に失敗する。

doAs=false の場合、 -n オプションによる指定はいずれにせよ無視されるということでしょうか。

create databse

doAs=true の場合

$ beeline -u "jdbc:hive2://localhost:10000" -n testuser

create database test_db1;
  => 成功する

$ hdfs dfs -ls /user/hive/warehouse
Found 1 items
drwxrwxrwx   - testuser hadoop          0 2020-06-27 05:38 /user/hive/warehouse/test_db1.db

所有者＝testuser でデータベースのディレクトリが作られました。

doAs=false の場合

一応 -n testuser を付けてみます。

$ beeline -u "jdbc:hive2://localhost:10000" -n testuser

create database test_db1;
  => 成功する

$ hdfs dfs -ls /user/hive/warehouse
Found 1 items
drwxrwxrwx   - hive hadoop          0 2020-06-27 05:41 /user/hive/warehouse/test_db1.db

やはり -n の指定は無視され、所有者＝hive でディレクトリが作られました。

create table + insert

doAs=true の場合

$ beeline -u "jdbc:hive2://localhost:10000" -n testuser

use test_db1;
create table test1 (name string);
insert into test1 values ('foo'), ('bar');

Error: org.apache.hive.service.cli.HiveSQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask. Permission denied: user=testuser, access=WRITE, inode="/user":hdfs:hadoop:drwxr-xr-x

insert 時に /user のパーミッションで怒られました。ちなみに test1 テーブルは所有者＝testuser で作られています。

$ hdfs dfs -ls /user/hive/warehouse/test_db1.db
Found 1 items
drwxrwxrwx   - testuser hadoop          0 2020-06-27 09:48 /user/hive/warehouse/test_db1.db/test1

hdfs:///user に実行権限を付けて再度 insert。

# sudo -u hdfs hdfs dfs -chmod 777 /user
$ beeline -u "jdbc:hive2://localhost:10000" -n testuser

use test_db1;
insert into test1 values ('foo'), ('bar');

今度は成功しました。

$ hdfs dfs -ls /user/hive/warehouse/test_db1.db/test1
Found 1 items
-rwxrwxrwx   3 testuser hadoop          8 2020-06-27 09:55 /user/hive/warehouse/test_db1.db/test1/000000_0

$ hdfs dfs -text /user/hive/warehouse/test_db1.db/test1/000000_0
foo
bar

データは所有者＝testuser で作られています。

doAs=false の場合

$ beeline -u "jdbc:hive2://localhost:10000" -n testuser

use test_db1;
create table test1 (name string);
insert into test1 values ('foo'), ('bar');

こちらはエラーになりませんでした。

$ hdfs dfs -ls /user/hive/warehouse/test_db1.db
Found 1 items
drwxrwxrwx   - hive hadoop          0 2020-06-27 09:58 /user/hive/warehouse/test_db1.db/test1

$ hdfs dfs -ls /user/hive/warehouse/test_db1.db/test1
Found 1 items
-rwxrwxrwx   3 hive hadoop          8 2020-06-27 09:58 /user/hive/warehouse/test_db1.db/test1/000000_0

テーブルのディレクトリとデータは所有者＝hive で作られています。

create external table + insert

doAs=true の場合

table_ext というテーブルを作り、 /user/testuser の下にデータを置くことにします。

$ hdfs dfs -ls /user
...
drwx------   - testuser hadoop          0 2020-06-27 09:54 /user/testuser
...

beeline -u "jdbc:hive2://localhost:10000" -n testuser

use test_db1;
create external table table_ext (name string)
  location '/user/testuser/table_ext/';
insert into table table_ext values ('foo'), ('bar');

$ hdfs dfs -ls /user/testuser/
Found 2 items
drwx------   - testuser hadoop          0 2020-06-27 10:22 /user/testuser/.staging
drwx------   - testuser hadoop          0 2020-06-27 10:22 /user/testuser/table_ext


$ hdfs dfs -ls /user/testuser/table_ext
Found 1 items
-rwx------   3 testuser hadoop          8 2020-06-27 10:22 /user/testuser/table_ext/000000_0

$ hdfs dfs -text /user/testuser/table_ext/000000_0
foo
bar

doAs=false の場合

こちらは hdfs:///user/testuser/ ディレクトリが存在しなかったので、 hdfs:///tmp/ の下にデータを置くことにします。

beeline -u "jdbc:hive2://localhost:10000" -n testuser

use test_db1;
create external table table_ext (name string)
  location '/tmp/table_ext/';
insert into table table_ext values ('foo'), ('bar');

$ hdfs dfs -ls /tmp/
Found 3 items
drwxrwxrwx   - mapred mapred          0 2020-06-27 04:55 /tmp/hadoop-yarn
drwx-wx-wx   - hive   hadoop          0 2020-06-27 04:56 /tmp/hive
drwxrwxrwt   - hive   hadoop          0 2020-06-27 10:29 /tmp/table_ext

$ hdfs dfs -ls /tmp/table_ext
Found 1 items
-rwxrwxrwt   3 hive hadoop          8 2020-06-27 10:29 /tmp/table_ext/000000_0

$ hdfs dfs -text /tmp/table_ext/000000_0
foo
bar

調査2: OSのユーザとproxy userの関係

doAs=false の場合の挙動はなんとなく分かってきましたが、 doAs=true の場合のOSのユーザとの関係がよく分からないので、こんどはそこを調べてみます。

Hadoop の proxy user というしくみが関わっているようだったので、次の3パターンでどうなるか試します。

user_os: OS のユーザのみ存在
user_proxyuser: proxy user の設定のみ
user_both: OS のユーザが存在し、かつ proxy user の設定もあり
- おそらく testuser と同等

データの配置場所が変わるだけだと思われたので外部テーブルについては省略。

設定ファイルを修正。下記は master からの差分です。 Puppet に詳しくないので、 testuser を grep したりして当たりを付けて適当に修正しました。

--- a/bigtop-deploy/puppet/manifests/cluster.pp
+++ b/bigtop-deploy/puppet/manifests/cluster.pp
@@ -159,7 +159,7 @@ $roles_map = {
 
 class hadoop_cluster_node (
   $hadoop_security_authentication = hiera("hadoop::hadoop_security_authentication", "simple"),
-  $bigtop_real_users = [ 'jenkins', 'testuser', 'hudson' ],
+  $bigtop_real_users = [ 'jenkins', 'testuser', 'hudson', 'user_os', 'user_both' ],
   $cluster_components = ["all"]
   ) {

--- a/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/hadoop/manifests/init.pp
@@ -20,7 +20,7 @@ class hadoop ($hadoop_security_authentication = "simple",
   $hadoop_storage_dirs = split($::hadoop_storage_dirs, ";"),
   $proxyusers = {
     oozie => { groups => 'hudson,testuser,root,hadoop,jenkins,oozie,hive,httpfs,users', hosts => "*" },
-     hive => { groups => 'hudson,testuser,root,hadoop,jenkins,oozie,hive,httpfs,users', hosts => "*" },
+     hive => { groups => 'hudson,testuser,user_both,user_proxyuser,root,hadoop,jenkins,oozie,hive,httpfs,users', hosts => "*" },
    httpfs => { groups => 'hudson,testuser,root,hadoop,jenkins,oozie,hive,httpfs,users', hosts => "*" } },
   $generate_secrets = false,
   $kms_host = undef,

--- a/provisioner/docker/config_centos-7.yaml
+++ b/provisioner/docker/config_centos-7.yaml
@@ -19,6 +19,6 @@ docker:
 
 repo: "http://repos.bigtop.apache.org/releases/1.4.0/centos/7/$basearch"
 distro: centos
-components: [hdfs, yarn, mapreduce]
+components: [hdfs, yarn, mapreduce, hive]
 enable_local_repo: false
 smoke_test_components: [hdfs, yarn, mapreduce]

あと、調査1のときと同様に hdfs:///user のパーミッションを変更しておきます。

# hdfs dfs -ls / | grep /user
drwxr-xr-x   - hdfs  hadoop          0 2020-06-28 03:13 /user
# sudo -u hdfs hdfs dfs -chmod 777 /user

OSユーザあり、proxy user 設定なし

# su - user_os

$ beeline -u "jdbc:hive2://localhost:10000" -n user_os

Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000: Failed to open new session: java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): User: hive is not allowed to impersonate user_os (state=08S01,code=0)

接続できない。

OSユーザなし、proxy user 設定あり

testuser を使います。

# su - testuser

$ beeline -u "jdbc:hive2://localhost:10000" -n user_proxyuser

Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000: Failed to open new session: java.lang.RuntimeException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): User: hive is not allowed to impersonate user_proxyuser (state=08S01,code=0)

接続できない。

OSユーザあり、proxy user 設定あり

# su - user_both
$ beeline -u "jdbc:hive2://localhost:10000" -n user_both

create database user_both_db;
use user_both_db;
create table table1 (name string);
insert into table1 values ('foo'), ('bar');

insert まで成功しました。

$ hdfs dfs -ls /user/hive/warehouse/
Found 1 items
drwxrwxrwx   - user_both hadoop          0 2020-06-28 03:19 /user/hive/warehouse/user_both_db.db

$ hdfs dfs -ls /user/hive/warehouse/user_both_db.db/
Found 1 items
drwxrwxrwx   - user_both hadoop          0 2020-06-28 03:22 /user/hive/warehouse/user_both_db.db/table1

$ hdfs dfs -ls /user/hive/warehouse/user_both_db.db/table1
Found 1 items
-rwxrwxrwx   3 user_both hadoop          8 2020-06-28 03:22 /user/hive/warehouse/user_both_db.db/table1/000000_0

それぞれ user_both で作られています。 testuser と同等の操作ができるようです。

OSユーザあり、proxy user 設定なし / グループのみ変更

user_os の場合接続の時点で失敗しましたが、 user_os を proxy user で設定されているグループに所属させるとどうでしょうか。ためしに users というグループでやってみます。

# id user_os
uid=1000(user_os) gid=1000(user_os) groups=1000(user_os)

# gpasswd -a user_os users
Adding user user_os to group users

# id user_os
uid=1000(user_os) gid=1000(user_os) groups=1000(user_os),100(users)

# su - user_os

$ beeline -u "jdbc:hive2://localhost:10000" -n user_os
  => OK

接続できました。 insert までやってみます。

create database user_os_db;
use user_os_db;
create table table1 (name string);
insert into table1 values ('foo'), ('bar');

$ hdfs dfs -ls /user/hive/warehouse/
Found 2 items
drwxrwxrwx   - user_both hadoop          0 2020-06-28 03:19 /user/hive/warehouse/user_both_db.db
drwxrwxrwx   - user_os   hadoop          0 2020-06-28 03:37 /user/hive/warehouse/user_os_db.db

$ hdfs dfs -ls /user/hive/warehouse/user_os_db.db/
Found 1 items
drwxrwxrwx   - user_os hadoop          0 2020-06-28 03:37 /user/hive/warehouse/user_os_db.db/table1

$ hdfs dfs -ls /user/hive/warehouse/user_os_db.db/table1
Found 1 items
-rwxrwxrwx   3 user_os hadoop          8 2020-06-28 03:37 /user/hive/warehouse/user_os_db.db/table1/000000_0

所有者＝user_os で作られました。なるほど。

2020-06-27

vm2gol v2 （43）フォーマットなどいろいろ修正

vm2gol-v2

前回 test ディレクトリを作ったので test_vgparser.rb をそちらに移動
各ステップで使っていたソースファイルを steps ディレクトリに移動
- 最初から細かくディレクトリを分けるのはあまり好みではないのですが、さすがに数が多くなってきたので
テストが2つになったので Rakefile を追加して rake test でテストを実行できるようにした
- ここまで標準ライブラリは利用しつつも追加 gem なしの縛りでやってきましたが、コンパイラまでできたので、ここから先はそんなにこだわらなくてもいいかなと
- とはいえ、次に挙げる Rubocop も含めてまだ補助ツールの範疇です（コンパイラ〜VM を動かすのに必須ではない）
Rubocop に従ってフォーマットなどの修正
- いろいろと雑なので……
- 一応人の目を気にして
  - 読まれてる気配はそんなにないですが
- すぐ修正できる細かいものだけ。修正量が大きくなりそうなものは後回し。
- ホビープロジェクトなので、気楽さが失われない程度に
- パフォーマンスまわりは優先度低いのでひとまず無効に
  - 読みやすさ・理解しやすさ・書きやすさの方を優先
z_* を .gitignore に追加
- 趣味プロジェクトなので適当です
- 最近は（趣味プロジェクトでは）これでファイルもディレクトリも雑に無視しています

目次ページに戻る / 前 / 次

2020-06-20

vm2gol v2 （42）ライフゲームのテスト

vm2gol-v2

これまでずっと、テスト書かなきゃなーとは思っていたのですが、書かないままズルズルとここまで来てしまいました。いいかげん書かないと。

これまで（第38〜40回あたり）はライフゲームを実行して動きを目で見て壊れてなさそうだと判断していましたが、その作業を自動化しましょう。

テスト実行の流れはこんな感じでどうでしょうか:

(1) gol.vg.txt から出発し、パース・コード生成・アセンブルを実行して実行ファイルを作る
(2) VM で実行ファイルを実行する
(3) 適当なところで実行を止める
(4) VM の内部を見て、期待する状態になっているか検証する

(1) (2) はこれまでやってきた通りで、 run.sh が行っていることとほぼ同じです。（とりあえずは）テストコードから同じように実行してやればよいでしょう。

(3) 現状では gol.vg.txt は Ctrl-C で止めるまで動き続けるようになっています。ここは（可能ならテストのときだけ）適当な数の世代が経過したら終了するように変更します。

(4) 検証をどうするか少し考えましたが、 VRAM のメイン領域だけをダンプして、期待する状態と比較するようにします。

では修正していきます。

まずは適当な世代が経過したら終了するようにしてみます。

適当に動かして調べてみたところ、20世代分の処理が完了した時点でグライダーが1週して元の位置に戻ると分かりました。 *1 ひとまずそこで終わるようにしてみます。

--- a/gol.vg.txt
+++ b/gol.vg.txt
@@ -201,8 +201,11 @@ func main() {
   call vram_set(w, 1, 2, 1);
   call vram_set(w, 2, 2, 1);
 
-  while (0 == 0) {
+  var gen_limit = 21;
+  var gen = 1;
+  while (gen != gen_limit) {
     call make_next_gen(w, h);
     call replace_with_buf();
+    set gen = gen + 1;
   }
 }

./run.sh gol.vg.txt を実行すると、期待通りに元の位置に戻った状態で終了することが確認できました。よしよし。

今の状態でテストコードから vgvm.rb を require すると、 require した瞬間に実行が始まってしまいます。それでは困りますから、エントリポイントの部分を if $0 == __FILE__ ... end で囲みます。

--- a/vgvm.rb
+++ b/vgvm.rb
@@ -481,6 +481,7 @@ class Vm
   end
 end
 
+if $0 == __FILE__
 exe_file = ARGV[0]
 
 stack_size = 50
@@ -489,3 +490,4 @@ vm = Vm.new(mem, stack_size)
 vm.load_program(exe_file)
 
 vm.start
+end

※インデントは後でまとめて修正しました（以下も同様）。

テストを実行するときはダンプ表示や $stdin.gets によるユーザ入力待ちは不要です。環境変数 TEST の有無を見て、テストのときはダンプ表示などを行わないようにしました。

--- a/vgvm.rb
+++ b/vgvm.rb
@@ -139,6 +139,10 @@ class Vm
     @step = 0
   end
 
+  def test?
+    ENV.key?("TEST")
+  end
+
   def set_sp(addr)
     raise "Stack overflow" if addr < 0
     @sp = addr
@@ -149,9 +153,11 @@ class Vm
   end
 
   def start
+    unless test?
     dump_v2() # 初期状態
     puts "Press enter key to start"
     $stdin.gets
+    end
 
     loop do
       @step += 1
@@ -236,6 +242,7 @@ class Vm
         raise "Unknown operator (#{op})"
       end
 
+      unless test?
       if ENV.key?("STEP")
         dump_v2()
         $stdin.gets
@@ -245,6 +252,7 @@ class Vm
       end
 
       # sleep 0.01
+      end
     end
   end

ためしに時間を測ってみると、20世代の処理が約 0.75秒で終わりました。やはりダンプ表示をやめると速くなりますね。ちなみに第38回で10ステップごとに1回ダンプ表示を行うようにした段階では約20秒かかっていました。

終了時のステップ数を見てみると 423,283 でした。 1命令の実行あたりにかかっている時間は平均して約 1.8 マイクロ秒ということになります。

20世代の処理が終わって while ループを抜けると、 main 関数自体からも抜けることになります。その次に何が起こるかというと、exit 命令の実行です。

現状では exit 命令が来たときに Kernel#exit するようになっていますが、ここで Ruby のプログラム全体が終了してしまうと検証がやりにくいので、単に return して Vm#start から戻るだけにします。

--- a/vgvm.rb
+++ b/vgvm.rb
@@ -169,8 +169,7 @@ class Vm
 
       case op
       when "exit"
-        $stderr.puts "exit"
-        exit
+        return
       when "set_reg_a"
         val = @mem.main[@pc + 1]
         set_reg_a(val)
@@ -499,4 +498,5 @@ vm.load_program(exe_file)
 
 vm.start
 vm.dump_v2()
+$stderr.puts "exit"
 end

VRAM のメイン領域だけをダンプする dump_vram_main() を Vm と Memory に追加。 Memory#dump_vram をコピペして、VRAM 全体ではなくメイン領域だけをダンプするようにしました。

--- a/vgvm.rb
+++ b/vgvm.rb
@@ -115,6 +115,15 @@ class Memory
       format_cols(main[li]) + " " + format_cols(buf[li])
     }.join("\n")
   end
+
+  def dump_vram_main
+    rows = @vram.each_slice(5).to_a
+    main = rows[0..4]
+
+    (0..4).map {|li|
+      format_cols(main[li])
+    }.join("\n")
+  end
 end
 
 class Vm
@@ -324,6 +333,10 @@ class Vm
     EOB
   end
 
+  def dump_vram_main
+    @mem.dump_vram_main()
+  end
+
   def add_ab
     @reg_a = @reg_a + @reg_b
   end

これで準備ができました。テストコードを書きます。

20世代で1週するケースをテストしておけば、トーラスの継ぎ目の処理がうまく動かなくなった場合でも気づけますし、最低限のテストとしては悪くないんじゃないでしょうか。

# coding: utf-8
require "minitest/autorun"
require_relative "../vgvm"

class GolTest < Minitest::Test
  PROJECT_DIR = File.join(__dir__, "../")
  TMP_DIR = File.join(PROJECT_DIR, "tmp")

  VG_FILE = File.join(PROJECT_DIR, "gol.vg.txt")
  VGT_FILE = File.join(TMP_DIR, "gol.vgt.json")
  ASM_FILE = File.join(TMP_DIR, "gol.vga.txt")
  EXE_FILE = File.join(TMP_DIR, "gol.vge.yaml")

  def setup
    ENV["TEST"] = ""

    stack_size = 50
    mem = Memory.new(stack_size)
    @vm = Vm.new(mem, stack_size)
  end

  def test_20generations
    system %Q{ ruby #{PROJECT_DIR}/vgparser.rb #{VG_FILE}  > #{VGT_FILE} }
    system %Q{ ruby #{PROJECT_DIR}/vgcg.rb     #{VGT_FILE} > #{ASM_FILE} }
    system %Q{ ruby #{PROJECT_DIR}/vgasm.rb    #{ASM_FILE} > #{EXE_FILE} }

    @vm.load_program(EXE_FILE)
    @vm.start()

    assert_equal(
      [
        ".@...",
        "..@..",
        "@@@..",
        ".....",
        ".....",
      ].join("\n"),
      @vm.dump_vram_main()
    )
  end
end

ruby test/gol_test.rb でテストが実行できます。

せっかくなので最初の1世代だけ実行した時点のテストケースも追加します。

最初の1世代が終わった時点ではこうなっていてほしい。

.....
@.@..
.@@..
.@...
.....

ちょっと強引ですが、gol.vg.txt のコードの var gen_limit = 21; の部分を文字列置換で直接書き換えるようにしました。

  def test_first_generation
    # 1世代で終了するように書き換える
    vg_file_replaced = File.join(TMP_DIR, "gol_replaced.vg.txt")
    src = File.read(VG_FILE)
    open(vg_file_replaced, "w") {|f|
      f.print src.sub("var gen_limit = 21;", "var gen_limit = 2;")
    }

    system %Q{ ruby #{PROJECT_DIR}/vgparser.rb #{vg_file_replaced} > #{VGT_FILE} }
    system %Q{ ruby #{PROJECT_DIR}/vgcg.rb     #{VGT_FILE} > #{ASM_FILE} }
    system %Q{ ruby #{PROJECT_DIR}/vgasm.rb    #{ASM_FILE} > #{EXE_FILE} }

    @vm.load_program(EXE_FILE)
    @vm.start()

    assert_equal(
      [
        ".....",
        "@.@..",
        ".@@..",
        ".@...",
        ".....",
      ].join("\n"),
      @vm.dump_vram_main()
    )
  end

これでまた安心感が増しました。

テスト実行でない普通の実行の場合は、やはり今までと同じようにずっと動き続けてほしいので、gen_limit = 0 としておきました。併せてテストコードの方も修正します。

--- a/gol.vg.txt
+++ b/gol.vg.txt
@@ -201,7 +201,7 @@ func main() {
   call vram_set(w, 1, 2, 1);
   call vram_set(w, 2, 2, 1);
 
-  var gen_limit = 21;
+  var gen_limit = 0;
   var gen = 1;
   while (gen != gen_limit) {
     call make_next_gen(w, h);

--- a/test/gol_test.rb
+++ b/test/gol_test.rb
@@ -20,7 +20,14 @@ class GolTest < Minitest::Test
   end
 
   def test_20generations
-    system %Q{ ruby #{PROJECT_DIR}/vgparser.rb #{VG_FILE}  > #{VGT_FILE} }
+    # 1世代で終了するように書き換える
+    vg_file_replaced = File.join(TMP_DIR, "gol_replaced.vg.txt")
+    src = File.read(VG_FILE)
+    open(vg_file_replaced, "w") {|f|
+      f.print src.sub("var gen_limit = 0;", "var gen_limit = 21;")
+    }
+
+    system %Q{ ruby #{PROJECT_DIR}/vgparser.rb #{vg_file_replaced}  > #{VGT_FILE} }
     system %Q{ ruby #{PROJECT_DIR}/vgcg.rb     #{VGT_FILE} > #{ASM_FILE} }
     system %Q{ ruby #{PROJECT_DIR}/vgasm.rb    #{ASM_FILE} > #{EXE_FILE} }
 
@@ -44,7 +51,7 @@ class GolTest < Minitest::Test
     vg_file_replaced = File.join(TMP_DIR, "gol_replaced.vg.txt")
     src = File.read(VG_FILE)
     open(vg_file_replaced, "w") {|f|
-      f.print src.sub("var gen_limit = 21;", "var gen_limit = 2;")
+      f.print src.sub("var gen_limit = 0;", "var gen_limit = 2;")
     }
 
     system %Q{ ruby #{PROJECT_DIR}/vgparser.rb #{vg_file_replaced} > #{VGT_FILE} }

あとは、 gen_limit の書き換え処理やコンパイル部分の処理が重複していたので適当にメソッド抽出しておきました（diff は省略）。

あー、あと、よく考えたら dump_vram_main() はテストのときしか使わないので、 vgvm.rb に置いておく必要はないですね。テストコードの方に移動させました（diff は省略）。

というわけで、めでたくライフゲームのふるまいを自動でテストできるようになりました 🎉

これでこの先安心してリファクタリングを進めることができます。

目次ページに戻る / 前 / 次

*1:この確認のため、以前適当に書いた gol.rb を修正して gol.vg.txt に近い形にしました。

回路図での監視箇所の指定

名前の受け渡し

状態の記録

描画

備考

簡易版

改行を変化させないようにしたもの

バージョン

参考

この記事を読んだ人はこちらも（たぶん）読んでいます

簡易版

改行を変化させない＋エンコーディング指定版

バージョン

この記事を読んだ人は（ひょっとしたら）こちらも読んでいます

関連

まとめ

バージョンなど

一応公式の説明

調査1: doAsの設定による違い

準備

接続

create databse

doAs=true の場合

doAs=false の場合

create table + insert

doAs=true の場合

doAs=false の場合

create external table + insert

doAs=true の場合

doAs=false の場合

調査2: OSのユーザとproxy userの関係

OSユーザあり、proxy user 設定なし

OSユーザなし、proxy user 設定あり

OSユーザあり、proxy user 設定あり

OSユーザあり、proxy user 設定なし / グループのみ変更