Athena使ってみたコマンド残し
Athena
テーブル作成
CREATE EXTERNAL TABLE IF NOT EXISTS elblogdb.dev1_web ( request_timestamp string, elb_name string, request_ip string, request_port int, backend_ip string, backend_port int, request_processing_time double, backend_processing_time double, client_response_time double, elb_response_code string, backend_response_code string, received_bytes bigint, sent_bytes bigint, request_verb string, url string, protocol string, user_agent string, ssl_cipher string, ssl_protocol string ) PARTITIONED BY(year string, month string, day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1','input.regex' = '([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) \\\"([^ ]*) ([^ ]*) (- |[^ ]*)\\\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$' ) LOCATION 's3://XXXX-dev-elblogs/dev1/web/AWSLogs/XXXXXXX/elasticloadbalancing/ap-northeast-1/';
パーティション追加
ALTER TABLE elblogdb.dev1_web ADD PARTITION (year='2018',month='05',day='14') location 's3://XXXX-dev-elblogs/dev1/web/AWSLogs/XXXXXX/elasticloadbalancing/ap-northeast-1/2018/05/14/'
パーティション確認
show partitions elblogdb.dev1_web;
パーティションロード
MSCK REPAIR TABLE elblogdb.dev1_web
検索
SELECT "dev1_web"."backend_ip" AS "backend_ip", "dev1_web"."backend_port" AS "backend_port", "dev1_web"."backend_processing_time" AS "backend_processing", "dev1_web"."backend_response_code" AS "backend_response_c", "dev1_web"."client_response_time" AS "client_response_ti", "dev1_web"."day" AS "day", "dev1_web"."elb_name" AS "elb_name", "dev1_web"."elb_response_code" AS "elb_response_code", "dev1_web"."month" AS "month", "dev1_web"."protocol" AS "protocol", "dev1_web"."received_bytes" AS "received_bytes", "dev1_web"."request_ip" AS "request_ip", "dev1_web"."request_port" AS "request_port", "dev1_web"."request_processing_time" AS "request_processing", "dev1_web"."request_timestamp" AS "request_timestamp", "dev1_web"."request_verb" AS "request_verb", "dev1_web"."sent_bytes" AS "sent_bytes", "dev1_web"."ssl_cipher" AS "ssl_cipher", "dev1_web"."ssl_protocol" AS "ssl_protocol", "dev1_web"."url" AS "url", "dev1_web"."user_agent" AS "user_agent", "dev1_web"."year" AS "year" FROM "elblogdb"."dev1_web" "dev1_web" WHERE REGEXP_LIKE(LOWER("dev1_web"."url"), CONCAT('.*', '7241996369447818596', '.*')) LIMIT 20
作業ログ
import boto3 import time client = boto3.client('athena') def lambda_handler(event, context): for record in event["Records"]: bucket = record['s3']['bucket']['name'] table_name = "elblogdb.dev1_web" key = record['s3']['object']['key'] key_tmp = key.split('/') year = key_tmp[len(key_tmp)-4] month = key_tmp[len(key_tmp)-3] day = key_tmp[len(key_tmp)-2] path = 'dev1/web/AWSLogs/XXXXXX/elasticloadbalancing/ap-northeast-1' + '/' + year + '/' + month + '/' + day + '/' sql = "ALTER TABLE {} ADD PARTITION (year=\'{}\',month=\'{}\',day=\'{}\') location \'{}\'".format \ (table_name, year, month , day,'s3://' + bucket + '/' + path) print('sql=' + sql) client.start_query_execution( QueryString=sql, QueryExecutionContext={ 'Database': 'elblogdb' }, ResultConfiguration={ 'OutputLocation': 's3://aws-athena-query-results-XXXXXX-ap-northeast-1' } )