release: prepare for 0.14.1

dist/redhat: Increase scylla-server service start timeout to 15 min
Fixes #749 Signed-off-by: Shlomi Livne <shlomi@scylladb.com>
2016-01-05 15:30:47 +02:00 · 2016-01-05 15:30:41 +02:00 · 2016-01-04 15:21:24 +02:00 · 2016-01-04 14:57:57 +02:00 · 2016-01-04 14:57:57 +02:00 · 2016-01-04 14:57:33 +02:00
328 changed files with 13138 additions and 5283 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 build
 build.ninja
 cscope.*
+/debian/
--- a/76
+++ b/76
@@ -1 +1,77 @@
 http://git-wip-us.apache.org/repos/asf/cassandra.git trunk (bf599fb5b062cbcc652da78b7d699e7a01b949ad)
+
+import = bf599fb5b062cbcc652da78b7d699e7a01b949ad
+Y      = Already in scylla
+
+$ git log --oneline import..cassandra-2.1.11 -- gms/
+Y  484e645 Mark node as dead even if already left
+   d0c166f Add trampled commit back
+   ba5837e Merge branch 'cassandra-2.0' into cassandra-2.1
+   718e47f Forgot a damn c/r
+   a7282e4 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  ae4cd69 Print versions for gossip states in gossipinfo.
+Y  7fba3d2 Don't mark nodes down before the max local pause interval once paused.
+   c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   ba9a69e checkForEndpointCollision fails for legitimate collisions, finalized list of statuses and nits, CASSANDRA-9765
+   54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
+   2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
+   4c15970 Merge branch 'cassandra-2.0' into cassandra-2.1
+   ad8047a ArrivalWindow should use primitives
+Y  4012134 Failure detector detects and ignores local pauses
+   9bcdd0f Merge branch 'cassandra-2.0' into cassandra-2.1
+   cefaa4e Close incoming connections when MessagingService is stopped
+   ea1beda Merge branch 'cassandra-2.0' into cassandra-2.1
+   08dbbd6 Ignore gossip SYNs after shutdown
+   3c17ac6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   a64bc43 lists work better when you initialize them
+   543a899 change list to arraylist
+   730d4d4 Merge branch 'cassandra-2.0' into cassandra-2.1
+   e3e2de0 change list to arraylist
+   f7884c5 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  84b2846 remove redundant state
+   4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
+Y  def4835 Add missing follow on fix for 7816 only applied to cassandra-2.1 branch in 763130bdbde2f4cec2e8973bcd5203caf51cc89f
+Y  763130b Followup commit for 7816
+   1376b8e Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  2199a87 Fix duplicate up/down messages sent to native clients
+   136042e Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  eb9c5bb Improve FD logging when the arrival time is ignored.
+
+$ git log --oneline import..cassandra-2.1.11 -- service/StorageService.java
+   92c5787 Keep StorageServiceMBean interface stable
+   6039d0e Fix DC and Rack in nodetool info
+   a2f0da0 Merge branch 'cassandra-2.0' into cassandra-2.1
+   c4de752 Follow-up to CASSANDRA-10238
+   e889ee4 2i key cache load fails
+   4b1d59e Merge branch 'cassandra-2.0' into cassandra-2.1
+   257cdaa Fix consolidating racks violating the RF contract
+Y  27754c0 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
+Y  5bc56c3 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
+Y  8f9ca07 Cannot replace token does not exist - DN node removed as Fat Client
+   c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
+   54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
+   1eccced Handle corrupt files on startup
+   2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
+   c4b5260 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  52dbc3f Can't transition from write survey to normal mode
+   9966419 Make rebuild only run one at a time
+   d693ca1 Merge branch 'cassandra-2.0' into cassandra-2.1
+   be9eff5 Add option to not validate atoms during scrub
+   2a4daaf followup fix for 8564
+   93478ab Wait for anticompaction to finish
+   9e9846e Fix for harmless exceptions being logged as ERROR
+   6d06f32 Fix anticompaction blocking ANTI_ENTROPY stage
+   4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
+Y  cba1b68 Fix failed bootstrap/replace attempts being persisted in system.peers
+   f59df28 Allow takeColumnFamilySnapshot to take a list of tables patch by Sachin Jarin; reviewed by Nick Bailey for CASSANDRA-8348
+Y  ac46747 Fix failed bootstrap/replace attempts being persisted in system.peers
+   5abab57 Merge branch 'cassandra-2.0' into cassandra-2.1
+   0ff9c3c Allow reusing snapshot tags across different column families.
+   f9c57a5 Merge branch 'cassandra-2.0' into cassandra-2.1
+Y  b296c55 Fix MOVED_NODE client event
+   bbb3fc7 Merge branch 'cassandra-2.0' into cassandra-2.1
+   37eb2a0 Fix NPE in nodetool getendpoints with bad ks/cf
+   f8b43d4 Merge branch 'cassandra-2.0' into cassandra-2.1
+   e20810c Remove C* specific class from JMX API
--- a/README.md
+++ b/README.md
@@ -11,13 +11,37 @@ git submodule init
 git submodule update --recursive
 ```

-### Building scylla on Fedora
-Installing required packages:
+### Building and Running Scylla on Fedora
+* Installing required packages:

 ```
 sudo yum install yaml-cpp-devel lz4-devel zlib-devel snappy-devel jsoncpp-devel thrift-devel antlr3-tool antlr3-C++-devel libasan libubsan
 ```

+* Build Scylla
+```
+./configure.py --mode=release --with=scylla --disable-xen
+ninja build/release/scylla -j2 # you can use more cpus if you have tons of RAM
+
+```
+
+* Run Scylla
+```
+./build/release/scylla
+
+```
+
+* run Scylla with one CPU and ./tmp as data directory
+
+```
+./build/release/scylla --datadir tmp --commitlog-directory tmp --smp 1
+```
+
+* For more run options:
+```
+./build/release/scylla --help
+```
+
 ## Building Fedora RPM

 As a pre-requisite, you need to install [Mock](https://fedoraproject.org/wiki/Mock) on your machine:
@@ -56,5 +80,17 @@ docker build -t <image-name> .
 Run the image with:

 ```
-docker run -i -t <image name>
+docker run -p $(hostname -i):9042:9042 -i -t <image name>
 ```
+
+
+## Contributing to Scylla
+
+Do not send pull requests.
+
+Send patches to the mailing list address scylladb-dev@googlegroups.com.
+Be sure to subscribe.
+
+In order for your patches to be merged, you must sign the Contributor's
+License Agreement, protecting your rights and ours.  See
+http://www.scylladb.com/opensource/cla/.
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 #!/bin/sh

-VERSION=development
+VERSION=0.14.1

 if test -f version
 then
--- a/api/api-doc/column_family.json
+++ b/api/api-doc/column_family.json
@@ -579,30 +579,6 @@
            }
         ]
      },
-      {
-         "path":"/column_family/sstables/snapshots_size/{name}",
-         "operations":[
-            {
-               "method":"GET",
-               "summary":"the size of SSTables in 'snapshots' subdirectory which aren't live anymore",
-               "type":"double",
-               "nickname":"true_snapshots_size",
-               "produces":[
-                  "application/json"
-               ],
-               "parameters":[
-                  {
-                     "name":"name",
-                     "description":"The column family name in keysspace:name format",
-                     "required":true,
-                     "allowMultiple":false,
-                     "type":"string",
-                     "paramType":"path"
-                  }
-               ]
-            }
-         ]
-      },
      {
         "path":"/column_family/metrics/memtable_columns_count/{name}",
         "operations":[
@@ -2041,7 +2017,7 @@
         ]
      },
      {
-         "path":"/column_family/metrics/true_snapshots_size/{name}",
+         "path":"/column_family/metrics/snapshots_size/{name}",
         "operations":[
            {
               "method":"GET",
--- a/api/api-doc/compaction_manager.json
+++ b/api/api-doc/compaction_manager.json
@@ -15,7 +15,7 @@
               "summary":"get List of running compactions",
               "type":"array",
               "items":{
-                  "type":"jsonmap"
+                  "type":"summary"
               },
               "nickname":"get_compactions",
               "produces":[
@@ -46,16 +46,16 @@
         ]
      },
      {
-         "path":"/compaction_manager/compaction_summary",
+         "path":"/compaction_manager/compaction_info",
         "operations":[
            {
               "method":"GET",
-               "summary":"get compaction summary",
+               "summary":"get a list of all active compaction info",
               "type":"array",
               "items":{
-                  "type":"string"
+                  "type":"compaction_info"
               },
-               "nickname":"get_compaction_summary",
+               "nickname":"get_compaction_info",
               "produces":[
                  "application/json"
               ],
@@ -174,30 +174,73 @@
    }
   ],
   "models":{
-      "mapper":{
-         "id":"mapper",
-         "description":"A key value mapping",
+      "row_merged":{
+         "id":"row_merged",
+         "description":"A row merged information",
         "properties":{
            "key":{
-               "type":"string",
-               "description":"The key"
+               "type":"int",
+               "description":"The number of sstable"
            },
            "value":{
-               "type":"string",
-               "description":"The value"
+               "type":"long",
+               "description":"The number or row compacted"
            }
         }
      },
-      "jsonmap":{
-         "id":"jsonmap",
-         "description":"A json representation of a map as a list of key value",
+      "compaction_info" :{
+          "id": "compaction_info",
+          "description":"A key value mapping",
+          "properties":{
+            "operation_type":{
+               "type":"string",
+               "description":"The operation type"
+            },
+            "completed":{
+               "type":"long",
+               "description":"The current completed"
+            },
+            "total":{
+               "type":"long",
+               "description":"The total to compact"
+            },
+            "unit":{
+               "type":"string",
+               "description":"The compacted unit"
+            }
+          }
+      },
+      "summary":{
+         "id":"summary",
+         "description":"A compaction summary object",
         "properties":{
-            "value":{
-               "type":"array",
-               "items":{
-                  "type":"mapper"
-               },
-               "description":"A list of key, value mapping"
+            "id":{
+               "type":"string",
+               "description":"The UUID"
+            },
+            "ks":{
+               "type":"string",
+               "description":"The keyspace name"
+            },
+            "cf":{
+               "type":"string",
+               "description":"The column family name"
+            },
+            "completed":{
+               "type":"long",
+               "description":"The number of units completed"
+            },
+            "total":{
+               "type":"long",
+               "description":"The total number of units"
+            },
+            "task_type":{
+               "type":"string",
+               "description":"The task compaction type"
+            },
+            "unit":{
+               "type":"string",
+               "description":"The units being used"
            }
         }
      },
@@ -232,7 +275,7 @@
            "rows_merged":{
               "type":"array",
               "items":{
-                  "type":"mapper"
+                  "type":"row_merged"
               },
               "description":"The merged rows"
            }
--- a/api/api-doc/failure_detector.json
+++ b/api/api-doc/failure_detector.json
@@ -48,7 +48,10 @@
            {
               "method":"GET",
               "summary":"Get all endpoint states",
-               "type":"string",
+               "type":"array",
+               "items":{
+                  "type":"endpoint_state"
+               },
               "nickname":"get_all_endpoint_states",
               "produces":[
                  "application/json"
@@ -148,6 +151,53 @@
                    "description": "The value"
                }
            }
+        },
+        "endpoint_state": {
+           "id": "states",
+           "description": "Holds an endpoint state",
+               "properties": {
+                "addrs": {
+                    "type": "string",
+                    "description": "The endpoint address"
+                },
+                "generation": {
+                    "type": "int",
+                    "description": "The heart beat generation"
+                },
+                "version": {
+                    "type": "int",
+                    "description": "The heart beat version"
+                },
+                "update_time": {
+                    "type": "long",
+                    "description": "The update timestamp"
+                },
+                "is_alive": {
+                    "type": "boolean",
+                    "description": "Is the endpoint alive"
+                },
+                "application_state" : {
+                    "type":"array",
+                    "items":{
+                        "type":"version_value"
+                    },
+                    "description": "Is the endpoint alive"
+                }
+            }
+        },
+        "version_value": {
+           "id": "version_value",
+           "description": "Holds a version value for an application state",
+               "properties": {
+                "application_state": {
+                    "type": "int",
+                    "description": "The application state enum index"
+                },
+                "value": {
+                    "type": "string",
+                    "description": "The version value"
+                }
+            }
        }
    }
 }
--- a/api/api-doc/messaging_service.json
+++ b/api/api-doc/messaging_service.json
@@ -8,13 +8,16 @@
   ],
   "apis":[
      {
-         "path":"/messaging_service/totaltimeouts",
+         "path":"/messaging_service/messages/timeout",
         "operations":[
            {
               "method":"GET",
-               "summary":"Total number of timeouts happened on this node",
-               "type":"long",
-               "nickname":"get_totaltimeouts",
+               "summary":"Get the number of timeout messages",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
+               "nickname":"get_timeout_messages",
               "produces":[
                  "application/json"
               ],
@@ -25,7 +28,7 @@
         ]
      },
      {
-         "path":"/messaging_service/messages/dropped",
+         "path":"/messaging_service/messages/dropped_by_ver",
         "operations":[
            {
               "method":"GET",
@@ -34,6 +37,25 @@
               "items":{
                  "type":"verb_counter"
               },
+               "nickname":"get_dropped_messages_by_ver",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/messaging_service/messages/dropped",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the number of messages that were dropped before sending",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
               "nickname":"get_dropped_messages",
               "produces":[
                  "application/json"
@@ -143,6 +165,49 @@
               ]
            }
         ]
+      },
+      {
+         "path":"/messaging_service/messages/respond_completed",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the number of completed respond messages",
+               "type":"array",
+               "items":{
+                  "type":"message_counter"
+               },
+               "nickname":"get_respond_completed_messages",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
+      {
+         "path":"/messaging_service/version",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"Get the version number",
+               "type":"int",
+               "nickname":"get_version",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+                  {
+                     "name":"addr",
+                     "description":"Address",
+                     "required":true,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  }
+               ]
+            }
+         ]
      }
   ],
   "models":{
@@ -150,10 +215,10 @@
         "id":"message_counter",
         "description":"Holds command counters",
         "properties":{
-            "count":{
+            "value":{
               "type":"long"
            },
-            "ip":{
+            "key":{
               "type":"string"
            }
         }
--- a/api/api-doc/storage_service.json
+++ b/api/api-doc/storage_service.json
@@ -290,6 +290,25 @@
            }
         ]
      },
+      {
+         "path":"/storage_service/describe_ring/",
+         "operations":[
+            {
+               "method":"GET",
+               "summary":"The TokenRange for a any keyspace",
+               "type":"array",
+               "items":{
+                  "type":"token_range"
+               },
+               "nickname":"describe_any_ring",
+               "produces":[
+                  "application/json"
+               ],
+               "parameters":[
+               ]
+            }
+         ]
+      },
      {
         "path":"/storage_service/describe_ring/{keyspace}",
         "operations":[
@@ -298,9 +317,9 @@
               "summary":"The TokenRange for a given keyspace",
               "type":"array",
               "items":{
-                  "type":"string"
+                  "type":"token_range"
               },
-               "nickname":"describe_ring_jmx",
+               "nickname":"describe_ring",
               "produces":[
                  "application/json"
               ],
@@ -311,7 +330,7 @@
                     "required":true,
                     "allowMultiple":false,
                     "type":"string",
-                     "paramType":"query"
+                     "paramType":"path"
                  }
               ]
            }
@@ -406,7 +425,7 @@
               "summary":"load value. Keys are IP addresses",
               "type":"array",
               "items":{
-                  "type":"mapper"
+                  "type":"double_mapper"
               },
               "nickname":"get_load_map",
               "produces":[
@@ -778,8 +797,72 @@
                     "paramType":"path"
                  },
                  {
-                     "name":"options",
-                     "description":"Options for the repair",
+                     "name":"primaryRange",
+                     "description":"If the value is the string 'true' with any capitalization, repair only the first range returned by the partitioner.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"parallelism",
+                     "description":"Repair parallelism, can be 0 (sequential), 1 (parallel) or 2 (datacenter-aware).",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"incremental",
+                     "description":"If the value is the string 'true' with any capitalization, perform incremental repair.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"jobThreads",
+                     "description":"An integer specifying the parallelism on each node.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"ranges",
+                     "description":"An explicit list of ranges to repair, overriding the default choice. Each range is expressed as token1:token2, and multiple ranges can be given as a comma separated list.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"columnFamilies",
+                     "description":"Which column families to repair in the given keyspace. Multiple columns families can be named separated by commas. If this option is missing, all column families in the keyspace are repaired.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"dataCenters",
+                     "description":"Which data centers are to participate in this repair. Multiple data centers can be listed separated by commas.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"hosts",
+                     "description":"Which hosts are to participate in this repair. Multiple hosts can be listed separated by commas.",
+                     "required":false,
+                     "allowMultiple":false,
+                     "type":"string",
+                     "paramType":"query"
+                  },
+                  {
+                     "name":"trace",
+                     "description":"If the value is the string 'true' with any capitalization, enable tracing of the repair.",
                     "required":false,
                     "allowMultiple":false,
                     "type":"string",
@@ -1945,6 +2028,20 @@
            }
         }
      },
+      "double_mapper":{
+         "id":"double_mapper",
+         "description":"A key value mapping between a string and a double",
+         "properties":{
+            "key":{
+               "type":"string",
+               "description":"The key"
+            },
+            "value":{
+               "type":"double",
+               "description":"The value"
+            }
+         }
+      },
      "maplist_mapper":{
         "id":"maplist_mapper",
         "description":"A key value mapping, where key and value are list",
@@ -2003,6 +2100,59 @@
               "description":"The column family"
            }
         }
+      },
+      "endpoint_detail":{
+         "id":"endpoint_detail",
+         "description":"Endpoint detail",
+         "properties":{
+            "host":{
+               "type":"string",
+               "description":"The endpoint host"
+            },
+            "datacenter":{
+               "type":"string",
+               "description":"The endpoint datacenter"
+            },
+            "rack":{
+               "type":"string",
+               "description":"The endpoint rack"
+            }
+         }
+      },
+      "token_range":{
+         "id":"token_range",
+         "description":"Endpoint range information",
+         "properties":{
+            "start_token":{
+               "type":"string",
+               "description":"The range start token"
+            },
+            "end_token":{
+               "type":"string",
+               "description":"The range start token"
+            },
+            "endpoints":{
+               "type":"array",
+               "items":{
+                  "type":"string"
+               },
+               "description":"The endpoints"
+            },
+            "rpc_endpoints":{
+               "type":"array",
+               "items":{
+                  "type":"string"
+               },
+               "description":"The rpc endpoints"
+            },
+            "endpoint_details":{
+               "type":"array",
+               "items":{
+                  "type":"endpoint_detail"
+               },
+               "description":"The endpoint details"
+            }
+         }
      }
   }
 }
--- a/api/api.hh
+++ b/api/api.hh
@@ -128,47 +128,54 @@ inline double pow2(double a) {
    return a * a;
 }

-inline httpd::utils_json::histogram add_histogram(httpd::utils_json::histogram res,
+// FIXME: Move to utils::ihistogram::operator+=()
+inline utils::ihistogram add_histogram(utils::ihistogram res,
        const utils::ihistogram& val) {
-    if (!res.count._set) {
-        res = val;
-        return res;
+    if (res.count == 0) {
+        return val;
    }
    if (val.count == 0) {
-        return res;
+        return std::move(res);
    }
-    if (res.min() > val.min) {
+    if (res.min > val.min) {
        res.min = val.min;
    }
-    if (res.max() < val.max) {
+    if (res.max < val.max) {
        res.max = val.max;
    }
-    double ncount = res.count() + val.count;
+    double ncount = res.count + val.count;
    // To get an estimated sum we take the estimated mean
    // and multiply it by the true count
-    res.sum = res.sum() + val.mean * val.count;
-    double a = res.count()/ncount;
+    res.sum = res.sum + val.mean * val.count;
+    double a = res.count/ncount;
    double b = val.count/ncount;

-    double mean =  a * res.mean() + b * val.mean;
+    double mean =  a * res.mean + b * val.mean;

-    res.variance = (res.variance() + pow2(res.mean() - mean) )* a +
+    res.variance = (res.variance + pow2(res.mean - mean) )* a +
            (val.variance + pow2(val.mean -mean))* b;

    res.mean = mean;
-    res.count = res.count() + val.count;
+    res.count = res.count + val.count;
    for (auto i : val.sample) {
-        res.sample.push(i);
+        res.sample.push_back(i);
    }
    return res;
 }

+inline
+httpd::utils_json::histogram to_json(const utils::ihistogram& val) {
+    httpd::utils_json::histogram h;
+    h = val;
+    return h;
+}
+
 template<class T, class F>
 future<json::json_return_type>  sum_histogram_stats(distributed<T>& d, utils::ihistogram F::*f) {

-    return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, httpd::utils_json::histogram(),
-            add_histogram).then([](const httpd::utils_json::histogram& val) {
-        return make_ready_future<json::json_return_type>(val);
+    return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, utils::ihistogram(),
+            add_histogram).then([](const utils::ihistogram& val) {
+        return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

--- a/api/column_family.cc
+++ b/api/column_family.cc
@@ -64,21 +64,21 @@ future<> foreach_column_family(http_context& ctx, const sstring& name, function<

 future<json::json_return_type>  get_cf_stats(http_context& ctx, const sstring& name,
        int64_t column_family::stats::*f) {
-    return map_reduce_cf(ctx, name, 0, [f](const column_family& cf) {
+    return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) {
        return cf.get_stats().*f;
    }, std::plus<int64_t>());
 }

 future<json::json_return_type>  get_cf_stats(http_context& ctx,
        int64_t column_family::stats::*f) {
-    return map_reduce_cf(ctx, 0, [f](const column_family& cf) {
+    return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) {
        return cf.get_stats().*f;
    }, std::plus<int64_t>());
 }

 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx, const sstring& name,
        utils::ihistogram column_family::stats::*f) {
-    return map_reduce_cf(ctx, name, 0, [f](const column_family& cf) {
+    return map_reduce_cf(ctx, name, int64_t(0), [f](const column_family& cf) {
        return (cf.get_stats().*f).count;
    }, std::plus<int64_t>());
 }
@@ -101,7 +101,7 @@ static future<json::json_return_type>  get_cf_stats_sum(http_context& ctx, const

 static future<json::json_return_type>  get_cf_stats_count(http_context& ctx,
        utils::ihistogram column_family::stats::*f) {
-    return map_reduce_cf(ctx, 0, [f](const column_family& cf) {
+    return map_reduce_cf(ctx, int64_t(0), [f](const column_family& cf) {
        return (cf.get_stats().*f).count;
    }, std::plus<int64_t>());
 }
@@ -110,28 +110,30 @@ static future<json::json_return_type>  get_cf_histogram(http_context& ctx, const
        utils::ihistogram column_family::stats::*f) {
    utils::UUID uuid = get_uuid(name, ctx.db.local());
    return ctx.db.map_reduce0([f, uuid](const database& p) {return p.find_column_family(uuid).get_stats().*f;},
-            httpd::utils_json::histogram(),
+            utils::ihistogram(),
            add_histogram)
-            .then([](const httpd::utils_json::histogram& val) {
-                return make_ready_future<json::json_return_type>(val);
+            .then([](const utils::ihistogram& val) {
+                return make_ready_future<json::json_return_type>(to_json(val));
    });
 }

 static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::ihistogram column_family::stats::*f) {
-    std::function<httpd::utils_json::histogram(const database&)> fun = [f] (const database& db)  {
-        httpd::utils_json::histogram res;
+    std::function<utils::ihistogram(const database&)> fun = [f] (const database& db)  {
+        utils::ihistogram res;
        for (auto i : db.get_column_families()) {
            res = add_histogram(res, i.second->get_stats().*f);
        }
        return res;
    };
-    return ctx.db.map(fun).then([](const std::vector<httpd::utils_json::histogram> &res) {
-        return make_ready_future<json::json_return_type>(res);
+    return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
+        std::vector<httpd::utils_json::histogram> r;
+        boost::copy(res | boost::adaptors::transformed(to_json), std::back_inserter(r));
+        return make_ready_future<json::json_return_type>(r);
    });
 }

 static future<json::json_return_type> get_cf_unleveled_sstables(http_context& ctx, const sstring& name) {
-    return map_reduce_cf(ctx, name, 0, [](const column_family& cf) {
+    return map_reduce_cf(ctx, name, int64_t(0), [](const column_family& cf) {
        return cf.get_unleveled_sstables();
    }, std::plus<int64_t>());
 }
@@ -221,25 +223,25 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            return cf.active_memtable().region().occupancy().total_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
            return cf.active_memtable().region().occupancy().total_space();
        }, std::plus<int64_t>());
    });

    cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });
@@ -254,7 +256,7 @@ void set_column_family(http_context& ctx, routes& r) {

    cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            return cf.occupancy().total_space();
        }, std::plus<int64_t>());
    });
@@ -263,21 +265,21 @@ void set_column_family(http_context& ctx, routes& r) {
        warn(unimplemented::cause::INDEXES);
        return ctx.db.map_reduce0([](const database& db){
            return db.dirty_memory_region_group().memory_used();
-        }, 0, std::plus<int64_t>()).then([](int res) {
+        }, int64_t(0), std::plus<int64_t>()).then([](int res) {
            return make_ready_future<json::json_return_type>(res);
        });
    });

    cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            return cf.occupancy().used_space();
        }, std::plus<int64_t>());
    });

    cf::get_all_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
        warn(unimplemented::cause::INDEXES);
-        return map_reduce_cf(ctx, 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](column_family& cf) {
            return cf.active_memtable().region().occupancy().used_space();
        }, std::plus<int64_t>());
    });
@@ -302,7 +304,7 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](column_family& cf) {
            uint64_t res = 0;
            for (auto i: *cf.get_sstables() ) {
                res += i.second->get_stats_metadata().estimated_row_size.count();
@@ -422,11 +424,11 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, max_row_size, max_int64);
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_row_size, max_int64);
    });

    cf::get_all_max_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, max_row_size, max_int64);
+        return map_reduce_cf(ctx, int64_t(0), max_row_size, max_int64);
    });

    cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<request> req) {
@@ -537,20 +539,20 @@ void set_column_family(http_context& ctx, routes& r) {
        }, std::plus<uint64_t>());
    });

-    cf::get_index_summary_off_heap_memory_used.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        // FIXME
-        // We are missing the off heap memory calculation
-        // Return 0 is the wrong value. It's a work around
-        // until the memory calculation will be available
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (column_family& cf) {
+            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
+                return sst.second->get_summary().memory_footprint();
+            });
+        }, std::plus<uint64_t>());
    });

-    cf::get_all_index_summary_off_heap_memory_used.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_all_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return map_reduce_cf(ctx, uint64_t(0), [] (column_family& cf) {
+            return std::accumulate(cf.get_sstables()->begin(), cf.get_sstables()->end(), uint64_t(0), [](uint64_t s, auto& sst) {
+                return sst.second->get_summary().memory_footprint();
+            });
+        }, std::plus<uint64_t>());
    });

    cf::get_compression_metadata_off_heap_memory_used.set(r, [] (std::unique_ptr<request> req) {
@@ -589,11 +591,16 @@ void set_column_family(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    cf::get_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        // FIXME
-        //auto id = get_uuid(req->param["name"], ctx.db.local());
-        return make_ready_future<json::json_return_type>(0);
+    cf::get_true_snapshots_size.set(r, [&ctx] (std::unique_ptr<request> req) {
+        auto uuid = get_uuid(req->param["name"], ctx.db.local());
+        return ctx.db.local().find_column_family(uuid).get_snapshot_details().then([](
+                const std::unordered_map<sstring, column_family::snapshot_details>& sd) {
+            int64_t res = 0;
+            for (auto i : sd) {
+                res += i.second.total;
+            }
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cf::get_all_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
@@ -616,25 +623,25 @@ void set_column_family(http_context& ctx, routes& r) {
    });

    cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().stats().hits;
        }, std::plus<int64_t>());
    });

    cf::get_all_row_cache_hit.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().stats().hits;
        }, std::plus<int64_t>());
    });

    cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, req->param["name"], 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().stats().misses;
        }, std::plus<int64_t>());
    });

    cf::get_all_row_cache_miss.set(r, [&ctx] (std::unique_ptr<request> req) {
-        return map_reduce_cf(ctx, 0, [](const column_family& cf) {
+        return map_reduce_cf(ctx, int64_t(0), [](const column_family& cf) {
            return cf.get_row_cache().stats().misses;
        }, std::plus<int64_t>());

--- a/api/compaction_manager.cc
+++ b/api/compaction_manager.cc
@@ -21,16 +21,17 @@

 #include "compaction_manager.hh"
 #include "api/api-doc/compaction_manager.json.hh"
+#include "db/system_keyspace.hh"

 namespace api {

 using namespace scollectd;
 namespace cm = httpd::compaction_manager_json;
-
+using namespace json;

 static future<json::json_return_type> get_cm_stats(http_context& ctx,
        int64_t compaction_manager::stats::*f) {
-    return ctx.db.map_reduce0([&](database& db) {
+    return ctx.db.map_reduce0([f](database& db) {
        return db.get_compaction_manager().get_stats().*f;
    }, int64_t(0), std::plus<int64_t>()).then([](const int64_t& res) {
        return make_ready_future<json::json_return_type>(res);
@@ -38,29 +39,38 @@ static future<json::json_return_type> get_cm_stats(http_context& ctx,
 }

 void set_compaction_manager(http_context& ctx, routes& r) {
-    cm::get_compactions.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        std::vector<cm::jsonmap> map;
-        return make_ready_future<json::json_return_type>(map);
-    });
+    cm::get_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
+        return ctx.db.map_reduce0([](database& db) {
+            std::vector<cm::summary> summaries;
+            const compaction_manager& cm = db.get_compaction_manager();

-    cm::get_compaction_summary.set(r, [] (std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
+            for (const auto& c : cm.get_compactions()) {
+                cm::summary s;
+                s.ks = c->ks;
+                s.cf = c->cf;
+                s.unit = "keys";
+                s.task_type = "compaction";
+                s.completed = c->total_keys_written;
+                s.total = c->total_partitions;
+                summaries.push_back(std::move(s));
+            }
+            return summaries;
+        }, std::vector<cm::summary>(), concat<cm::summary>).then([](const std::vector<cm::summary>& res) {
+            return make_ready_future<json::json_return_type>(res);
+        });
    });

    cm::force_user_defined_compaction.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>("");
+        // FIXME
+        warn(unimplemented::cause::API);
+        return make_ready_future<json::json_return_type>(json_void());
    });

    cm::stop_compaction.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        warn(unimplemented::cause::API);
        return make_ready_future<json::json_return_type>("");
    });

@@ -81,14 +91,42 @@ void set_compaction_manager(http_context& ctx, routes& r) {

    cm::get_bytes_compacted.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        warn(unimplemented::cause::API);
        return make_ready_future<json::json_return_type>(0);
    });

    cm::get_compaction_history.set(r, [] (std::unique_ptr<request> req) {
+        return db::system_keyspace::get_compaction_history().then([] (std::vector<db::system_keyspace::compaction_history_entry> history) {
+            std::vector<cm::history> res;
+            res.reserve(history.size());
+
+            for (auto& entry : history) {
+                cm::history h;
+                h.id = entry.id.to_sstring();
+                h.ks = std::move(entry.ks);
+                h.cf = std::move(entry.cf);
+                h.compacted_at = entry.compacted_at;
+                h.bytes_in = entry.bytes_in;
+                h.bytes_out =  entry.bytes_out;
+                for (auto it : entry.rows_merged) {
+                    httpd::compaction_manager_json::row_merged e;
+                    e.key = it.first;
+                    e.value = it.second;
+                    h.rows_merged.push(std::move(e));
+                }
+                res.push_back(std::move(h));
+            }
+
+            return make_ready_future<json::json_return_type>(res);
+        });
+    });
+
+    cm::get_compaction_info.set(r, [] (std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
-        std::vector<cm::history> res;
+        // FIXME
+        warn(unimplemented::cause::API);
+        std::vector<cm::compaction_info> res;
        return make_ready_future<json::json_return_type>(res);
    });

--- a/api/failure_detector.cc
+++ b/api/failure_detector.cc
@@ -22,15 +22,33 @@
 #include "failure_detector.hh"
 #include "api/api-doc/failure_detector.json.hh"
 #include "gms/failure_detector.hh"
+#include "gms/application_state.hh"
+#include "gms/gossiper.hh"
 namespace api {

 namespace fd = httpd::failure_detector_json;

 void set_failure_detector(http_context& ctx, routes& r) {
    fd::get_all_endpoint_states.set(r, [](std::unique_ptr<request> req) {
-        return gms::get_all_endpoint_states().then([](const sstring& str) {
-            return make_ready_future<json::json_return_type>(str);
-        });
+        std::vector<fd::endpoint_state> res;
+        for (auto i : gms::get_local_gossiper().endpoint_state_map) {
+            fd::endpoint_state val;
+            val.addrs = boost::lexical_cast<std::string>(i.first);
+            val.is_alive = i.second.is_alive();
+            val.generation = i.second.get_heart_beat_state().get_generation();
+            val.version = i.second.get_heart_beat_state().get_heart_beat_version();
+            val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
+            for (auto a : i.second.get_application_state_map()) {
+                fd::version_value version_val;
+                // We return the enum index and not it's name to stay compatible to origin
+                // method that the state index are static but the name can be changed.
+                version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
+                version_val.value = a.second.value;
+                val.application_state.push(version_val);
+            }
+            res.push_back(val);
+        }
+        return make_ready_future<json::json_return_type>(res);
    });

    fd::get_up_endpoint_count.set(r, [](std::unique_ptr<request> req) {
--- a/api/messaging_service.cc
+++ b/api/messaging_service.cc
@@ -41,8 +41,8 @@ std::vector<message_counter> map_to_message_counters(
    std::vector<message_counter> res;
    for (auto i : map) {
        res.push_back(message_counter());
-        res.back().ip = boost::lexical_cast<sstring>(i.first);
-        res.back().count = i.second;
+        res.back().key = boost::lexical_cast<sstring>(i.first);
+        res.back().value = i.second;
    }
    return res;
 }
@@ -70,12 +70,39 @@ future_json_function get_client_getter(std::function<uint64_t(const shard_info&)
    };
 }

+future_json_function get_server_getter(std::function<uint64_t(const rpc::stats&)> f) {
+    return [f](std::unique_ptr<request> req) {
+        using map_type = std::unordered_map<gms::inet_address, uint64_t>;
+        auto get_shard_map = [f](messaging_service& ms) {
+            std::unordered_map<gms::inet_address, unsigned long> map;
+            ms.foreach_server_connection_stats([&map, f] (const rpc::client_info& info, const rpc::stats& stats) mutable {
+                map[gms::inet_address(net::ipv4_address(info.addr))] = f(stats);
+            });
+            return map;
+        };
+        return  get_messaging_service().map_reduce0(get_shard_map, map_type(), map_sum<map_type>).
+                then([](map_type&& map) {
+            return make_ready_future<json::json_return_type>(map_to_message_counters(map));
+        });
+    };
+}
+
 void set_messaging_service(http_context& ctx, routes& r) {
+    get_timeout_messages.set(r, get_client_getter([](const shard_info& c) {
+        return c.get_stats().timeout;
+    }));

    get_sent_messages.set(r, get_client_getter([](const shard_info& c) {
        return c.get_stats().sent_messages;
    }));

+    get_dropped_messages.set(r, get_client_getter([](const shard_info& c) {
+        // We don't have the same drop message mechanism
+        // as origin has.
+        // hence we can always return 0
+        return 0;
+    }));
+
    get_exception_messages.set(r, get_client_getter([](const shard_info& c) {
        return c.get_stats().exception_received;
    }));
@@ -84,11 +111,19 @@ void set_messaging_service(http_context& ctx, routes& r) {
        return c.get_stats().pending;
    }));

-    get_respond_pending_messages.set(r, get_client_getter([](const shard_info& c) {
-        return c.get_stats().wait_reply;
+    get_respond_pending_messages.set(r, get_server_getter([](const rpc::stats& c) {
+        return c.pending;
    }));

-    get_dropped_messages.set(r, [](std::unique_ptr<request> req) {
+    get_respond_completed_messages.set(r, get_server_getter([](const rpc::stats& c) {
+        return c.sent_messages;
+    }));
+
+    get_version.set(r, [](const_req req) {
+        return net::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
+    });
+
+    get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
        shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb, 0);

        return net::get_messaging_service().map_reduce([map](const uint64_t* local_map) mutable {
--- a/api/storage_proxy.cc
+++ b/api/storage_proxy.cc
@@ -201,22 +201,16 @@ void set_storage_proxy(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(json_void());
    });

-    sp::get_read_repair_attempted.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_attempted.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_attempts);
    });

-    sp::get_read_repair_repaired_blocking.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_repaired_blocking.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_blocking);
    });

-    sp::get_read_repair_repaired_background.set(r, [](std::unique_ptr<request> req)  {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(0);
+    sp::get_read_repair_repaired_background.set(r, [&ctx](std::unique_ptr<request> req)  {
+        return sum_stats(ctx.sp, &proxy::stats::read_repair_repaired_background);
    });

    sp::get_schema_versions.set(r, [](std::unique_ptr<request> req)  {
--- a/api/storage_service.cc
+++ b/api/storage_service.cc
@@ -43,6 +43,29 @@ static sstring validate_keyspace(http_context& ctx, const parameters& param) {
    throw bad_param_exception("Keyspace " + param["keyspace"] + " Does not exist");
 }

+
+static std::vector<ss::token_range> describe_ring(const sstring& keyspace) {
+    std::vector<ss::token_range> res;
+    for (auto d : service::get_local_storage_service().describe_ring(keyspace)) {
+        ss::token_range r;
+        r.start_token = d._start_token;
+        r.end_token = d._end_token;
+        r.endpoints = d._endpoints;
+        r.rpc_endpoints = d._rpc_endpoints;
+        for (auto det : d._endpoint_details) {
+            ss::endpoint_detail ed;
+            ed.host = det._host;
+            ed.datacenter = det._datacenter;
+            if (det._rack != "") {
+                ed.rack = det._rack;
+            }
+            r.endpoint_details.push(ed);
+        }
+        res.push_back(r);
+    }
+    return res;
+}
+
 void set_storage_service(http_context& ctx, routes& r) {
    ss::local_hostid.set(r, [](std::unique_ptr<request> req) {
        return db::system_keyspace::get_local_host_id().then([](const utils::UUID& id) {
@@ -66,7 +89,7 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_token_endpoint.set(r, [] (const_req req) {
-        auto token_to_ep = service::get_local_storage_service().get_token_metadata().get_token_to_endpoint();
+        auto token_to_ep = service::get_local_storage_service().get_token_to_endpoint_map();
        std::vector<storage_service_json::mapper> res;
        return map_to_key_value(token_to_ep, res);
    });
@@ -125,12 +148,13 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(res);
    });

-    ss::describe_ring_jmx.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
+    ss::describe_any_ring.set(r, [&ctx](const_req req) {
+        return describe_ring("");
+    });
+
+    ss::describe_ring.set(r, [&ctx](const_req req) {
+        auto keyspace = validate_keyspace(ctx, req.param);
+        return describe_ring(keyspace);
    });

    ss::get_host_id_map.set(r, [](const_req req) {
@@ -145,8 +169,14 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::get_load_map.set(r, [] (std::unique_ptr<request> req) {
        return service::get_local_storage_service().get_load_map().then([] (auto&& load_map) {
-            std::vector<ss::mapper> res;
-            return make_ready_future<json::json_return_type>(map_to_key_value(load_map, res));
+            std::vector<ss::double_mapper> res;
+            for (auto i : load_map) {
+                ss::double_mapper val;
+                val.key = i.first;
+                val.value = i.second;
+                res.push_back(val);
+            }
+            return make_ready_future<json::json_return_type>(res);
        });
    });

@@ -157,15 +187,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::get_natural_endpoints.set(r, [&ctx](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        auto keyspace = validate_keyspace(ctx, req->param);
-        auto column_family = req->get_query_param("cf");
-        auto key = req->get_query_param("key");
-
-        std::vector<sstring> res;
-        return make_ready_future<json::json_return_type>(res);
+    ss::get_natural_endpoints.set(r, [&ctx](const_req req) {
+        auto keyspace = validate_keyspace(ctx, req.param);
+        return container_to_vec(service::get_local_storage_service().get_natural_endpoints(keyspace, req.get_query_param("cf"),
+                req.get_query_param("key")));
    });

    ss::get_snapshot_details.set(r, [](std::unique_ptr<request> req) {
@@ -247,10 +272,14 @@ void set_storage_service(http_context& ctx, routes& r) {

    ss::force_keyspace_cleanup.set(r, [&ctx](std::unique_ptr<request> req) {
        //TBD
-        unimplemented();
+        // FIXME
+        // the nodetool clean up is used in many tests
+        // this workaround willl let it work until
+        // a cleanup is implemented
+        warn(unimplemented::cause::API);
        auto keyspace = validate_keyspace(ctx, req->param);
        auto column_family = req->get_query_param("cf");
-        return make_ready_future<json::json_return_type>(json_void());
+        return make_ready_future<json::json_return_type>(0);
    });

    ss::scrub.set(r, [&ctx](std::unique_ptr<request> req) {
@@ -289,18 +318,14 @@ void set_storage_service(http_context& ctx, routes& r) {


    ss::repair_async.set(r, [&ctx](std::unique_ptr<request> req) {
-        // Currently, we get all the repair options encoded in a single
-        // "options" option, and split it to a map using the "," and ":"
-        // delimiters. TODO: consider if it doesn't make more sense to just
-        // take all the query parameters as this map and pass it to the repair
-        // function.
+        static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
+                "jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace"};
        std::unordered_map<sstring, sstring> options_map;
-        for (auto s : split(req->get_query_param("options"), ",")) {
-            auto kv = split(s, ":");
-            if (kv.size() != 2) {
-                throw httpd::bad_param_exception("malformed async repair options");
+        for (auto o : options) {
+            auto s = req->get_query_param(o);
+            if (s != "") {
+                options_map[o] = s;
            }
-            options_map.emplace(std::move(kv[0]), std::move(kv[1]));
        }

        // The repair process is asynchronous: repair_start only starts it and
@@ -338,11 +363,11 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::move.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
+    ss::move.set(r, [] (std::unique_ptr<request> req) {
        auto new_token = req->get_query_param("new_token");
-        return make_ready_future<json::json_return_type>(json_void());
+        return service::get_local_storage_service().move(new_token).then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });

    ss::remove_node.set(r, [](std::unique_ptr<request> req) {
@@ -392,15 +417,18 @@ void set_storage_service(http_context& ctx, routes& r) {
    });

    ss::get_drain_progress.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>("");
+        return service::get_storage_service().map_reduce(adder<service::storage_service::drain_progress>(), [] (auto& ss) {
+            return ss.get_drain_progress();
+        }).then([] (auto&& progress) {
+            auto progress_str = sprint("Drained %s/%s ColumnFamilies", progress.remaining_cfs, progress.total_cfs);
+            return make_ready_future<json::json_return_type>(std::move(progress_str));
+        });
    });

    ss::drain.set(r, [](std::unique_ptr<request> req) {
-        //TBD
-        unimplemented();
-        return make_ready_future<json::json_return_type>(json_void());
+        return service::get_local_storage_service().drain().then([] {
+            return make_ready_future<json::json_return_type>(json_void());
+        });
    });
    ss::truncate.set(r, [&ctx](std::unique_ptr<request> req) {
        //TBD
@@ -495,8 +523,10 @@ void set_storage_service(http_context& ctx, routes& r) {
        });
    });

-    ss::is_joined.set(r, [](const_req req) {
-        return service::get_local_storage_service().is_joined();
+    ss::is_joined.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().is_joined().then([] (bool is_joined) {
+            return make_ready_future<json::json_return_type>(is_joined);
+        });
    });

    ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr<request> req) {
@@ -725,17 +755,19 @@ void set_storage_service(http_context& ctx, routes& r) {
        return make_ready_future<json::json_return_type>(0);
    });

-    ss::get_ownership.set(r, [](const_req req) {
-        auto tokens = service::get_local_storage_service().get_ownership();
-        std::vector<storage_service_json::mapper> res;
-        return map_to_key_value(tokens, res);
+    ss::get_ownership.set(r, [] (std::unique_ptr<request> req) {
+        return service::get_local_storage_service().get_ownership().then([] (auto&& ownership) {
+            std::vector<storage_service_json::mapper> res;
+            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
+        });
    });

-    ss::get_effective_ownership.set(r, [&ctx](const_req req) {
-        auto tokens = service::get_local_storage_service().effective_ownership(
-                (req.param["keyspace"] == "null")? "" : validate_keyspace(ctx, req.param));
-        std::vector<storage_service_json::mapper> res;
-        return map_to_key_value(tokens, res);
+    ss::get_effective_ownership.set(r, [&ctx] (std::unique_ptr<request> req) {
+        auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param);
+        return service::get_local_storage_service().effective_ownership(keyspace_name).then([] (auto&& ownership) {
+            std::vector<storage_service_json::mapper> res;
+            return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
+        });
    });
 }

--- a/atomic_cell.hh
+++ b/atomic_cell.hh
@@ -234,6 +234,8 @@ public:
    friend std::ostream& operator<<(std::ostream& os, const atomic_cell& ac);
 };

+class collection_mutation_view;
+
 // Represents a mutation of a collection.  Actual format is determined by collection type,
 // and is:
 //   set:  list of atomic_cell
@@ -241,20 +243,30 @@ public:
 //   list: tbd, probably ugly
 class collection_mutation {
 public:
-    struct view {
-        bytes_view data;
-        bytes_view serialize() const { return data; }
-        static view from_bytes(bytes_view v) { return { v }; }
-    };
-    struct one {
-        managed_bytes data;
-        one() {}
-        one(managed_bytes b) : data(std::move(b)) {}
-        one(view v) : data(v.data) {}
-        operator view() const { return { data }; }
-    };
+    managed_bytes data;
+    collection_mutation() {}
+    collection_mutation(managed_bytes b) : data(std::move(b)) {}
+    collection_mutation(collection_mutation_view v);
+    operator collection_mutation_view() const;
 };

+class collection_mutation_view {
+public:
+    bytes_view data;
+    bytes_view serialize() const { return data; }
+    static collection_mutation_view from_bytes(bytes_view v) { return { v }; }
+};
+
+inline
+collection_mutation::collection_mutation(collection_mutation_view v)
+        : data(v.data) {
+}
+
+inline
+collection_mutation::operator collection_mutation_view() const {
+    return { data };
+}
+
 namespace db {
 template<typename T>
 class serializer;
@@ -274,15 +286,15 @@ public:
    atomic_cell_or_collection(atomic_cell ac) : _data(std::move(ac._data)) {}
    static atomic_cell_or_collection from_atomic_cell(atomic_cell data) { return { std::move(data._data) }; }
    atomic_cell_view as_atomic_cell() const { return atomic_cell_view::from_bytes(_data); }
-    atomic_cell_or_collection(collection_mutation::one cm) : _data(std::move(cm.data)) {}
+    atomic_cell_or_collection(collection_mutation cm) : _data(std::move(cm.data)) {}
    explicit operator bool() const {
        return !_data.empty();
    }
-    static atomic_cell_or_collection from_collection_mutation(collection_mutation::one data) {
+    static atomic_cell_or_collection from_collection_mutation(collection_mutation data) {
        return std::move(data.data);
    }
-    collection_mutation::view as_collection_mutation() const {
-        return collection_mutation::view{_data};
+    collection_mutation_view as_collection_mutation() const {
+        return collection_mutation_view{_data};
    }
    bytes_view serialize() const {
        return _data;
@@ -290,6 +302,12 @@ public:
    bool operator==(const atomic_cell_or_collection& other) const {
        return _data == other._data;
    }
+    void linearize() {
+        _data.linearize();
+    }
+    void unlinearize() {
+        _data.scatter();
+    }
    friend std::ostream& operator<<(std::ostream&, const atomic_cell_or_collection&);
 };

--- a/bytes_ostream.hh
+++ b/bytes_ostream.hh
@@ -33,8 +33,10 @@
 *
 */
 class bytes_ostream {
+public:
    using size_type = bytes::size_type;
    using value_type = bytes::value_type;
+private:
    static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
    struct chunk {
        // FIXME: group fragment pointers to reduce pointer chasing when packetizing
@@ -117,13 +119,13 @@ private:
        };
    }
 public:
-    bytes_ostream()
+    bytes_ostream() noexcept
        : _begin()
        , _current(nullptr)
        , _size(0)
    { }

-    bytes_ostream(bytes_ostream&& o)
+    bytes_ostream(bytes_ostream&& o) noexcept
        : _begin(std::move(o._begin))
        , _current(o._current)
        , _size(o._size)
@@ -148,7 +150,7 @@ public:
        return *this;
    }

-    bytes_ostream& operator=(bytes_ostream&& o) {
+    bytes_ostream& operator=(bytes_ostream&& o) noexcept {
        _size = o._size;
        _begin = std::move(o._begin);
        _current = o._current;
--- a/caching_options.hh
+++ b/caching_options.hh
@@ -82,6 +82,12 @@ public:
        }
        return caching_options(k, r);
    }
+    bool operator==(const caching_options& other) const {
+        return _key_cache == other._key_cache && _row_cache == other._row_cache;
+    }
+    bool operator!=(const caching_options& other) const {
+        return !(*this == other);
+    }
 };


--- a/compaction_strategy.hh
+++ b/compaction_strategy.hh
@@ -63,16 +63,18 @@ public:
    }

    static compaction_strategy_type type(const sstring& name) {
-        if (name == "NullCompactionStrategy") {
+        auto pos = name.find("org.apache.cassandra.db.compaction.");
+        sstring short_name = (pos == sstring::npos) ? name : name.substr(pos + 35);
+        if (short_name == "NullCompactionStrategy") {
            return compaction_strategy_type::null;
-        } else if (name == "MajorCompactionStrategy") {
+        } else if (short_name == "MajorCompactionStrategy") {
            return compaction_strategy_type::major;
-        } else if (name == "SizeTieredCompactionStrategy") {
+        } else if (short_name == "SizeTieredCompactionStrategy") {
            return compaction_strategy_type::size_tiered;
-        } else if (name == "LeveledCompactionStrategy") {
+        } else if (short_name == "LeveledCompactionStrategy") {
            return compaction_strategy_type::leveled;
        } else {
-            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class 'org.apache.cassandra.db.compaction.%s", name));
+            throw exceptions::configuration_exception(sprint("Unable to find compaction strategy class '%s'", name));
        }
    }

--- a/compound.hh
+++ b/compound.hh
@@ -68,7 +68,7 @@ public:
        , _byte_order_equal(std::all_of(_types.begin(), _types.end(), [] (auto t) {
                return t->is_byte_order_equal();
            }))
-        , _byte_order_comparable(_types.size() == 1 && _types[0]->is_byte_order_comparable())
+        , _byte_order_comparable(!is_prefixable && _types.size() == 1 && _types[0]->is_byte_order_comparable())
        , _is_reversed(_types.size() == 1 && _types[0]->is_reversed())
    { }

@@ -159,7 +159,7 @@ public:
        }
        return ::serialize_value(*this, values);
    }
-    bytes serialize_value_deep(const std::vector<boost::any>& values) {
+    bytes serialize_value_deep(const std::vector<data_value>& values) {
        // TODO: Optimize
        std::vector<bytes> partial;
        partial.reserve(values.size());
@@ -278,10 +278,10 @@ public:
            });
    }
    bytes from_string(sstring_view s) {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
    }
    sstring to_string(const bytes& b) {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
    }
    // Retruns true iff given prefix has no missing components
    bool is_full(bytes_view v) const {
--- a/compress.hh
+++ b/compress.hh
@@ -114,6 +114,14 @@ public:
        }
        return opts;
    }
+    bool operator==(const compression_parameters& other) const {
+        return _compressor == other._compressor
+               && _chunk_length == other._chunk_length
+               && _crc_check_chance == other._crc_check_chance;
+    }
+    bool operator!=(const compression_parameters& other) const {
+        return !(*this == other);
+    }
 private:
    void validate_options(const std::map<sstring, sstring>& options) {
        // currently, there are no options specific to a particular compressor
--- a/conf/cassandra-rackdc.properties
+++ b/conf/cassandra-rackdc.properties
--- a/conf/scylla.yaml
+++ b/conf/scylla.yaml
@@ -409,15 +409,16 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
 #   offheap_objects: native memory, eliminating nio buffer heap overhead
 # memtable_allocation_type: heap_buffers

-# Total space to use for commitlogs.  Since commitlog segments are
-# mmapped, and hence use up address space, the default size is 32
-# on 32-bit JVMs, and 8192 on 64-bit JVMs.
+# Total space to use for commitlogs.
 #
 # If space gets above this value (it will round up to the next nearest
 # segment multiple), Scylla will flush every dirty CF in the oldest
 # segment and remove it.  So a small total commitlog space will tend
 # to cause more flush activity on less-active columnfamilies.
-commitlog_total_space_in_mb: 8192
+#
+# A value of -1 (default) will automatically equate it to the total amount of memory
+# available for Scylla.
+commitlog_total_space_in_mb: -1

 # This sets the amount of memtable flush writer threads.  These will
 # be blocked by disk io, and each one will hold a memtable in memory
@@ -781,40 +782,25 @@ commitlog_total_space_in_mb: 8192
 # the request scheduling. Currently the only valid option is keyspace.
 # request_scheduler_id: keyspace

-# Enable or disable inter-node encryption
-# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
-# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
-# suite for authentication, key exchange and encryption of the actual data transfers.
-# Use the DHE/ECDHE ciphers if running in FIPS 140 compliant mode.
-# NOTE: No custom encryption options are enabled at the moment
+# Enable or disable inter-node encryption. 
+# You must also generate keys and provide the appropriate key and trust store locations and passwords. 
+# No custom encryption options are currently enabled. The available options are:
+#
 # The available internode options are : all, none, dc, rack
-#
-# If set to dc cassandra will encrypt the traffic between the DCs
-# If set to rack cassandra will encrypt the traffic between the racks
-#
-# The passwords used in these options must match the passwords used when generating
-# the keystore and truststore.  For instructions on generating these files, see:
-# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
+# If set to dc scylla  will encrypt the traffic between the DCs
+# If set to rack scylla  will encrypt the traffic between the racks
 #
 # server_encryption_options:
 #    internode_encryption: none
-#    keystore: conf/.keystore
-#    keystore_password: cassandra
-#    truststore: conf/.truststore
-#    truststore_password: cassandra
-
-    # More advanced defaults below:
-    # protocol: TLS
-    # algorithm: SunX509
-    # store_type: JKS
-    # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
-    # require_client_auth: false
+#    certificate: conf/scylla.crt
+#    keyfile: conf/scylla.key
+#    truststore: <none, use system trust>

 # enable or disable client/server encryption.
 # client_encryption_options:
 #    enabled: false
-#    keystore: conf/.keystore
-#    keystore_password: cassandra
+#    certificate: conf/scylla.crt
+#    keyfile: conf/scylla.key

    # require_client_auth: false
    # Set trustore and truststore_password if require_client_auth is true
@@ -838,3 +824,17 @@ commitlog_total_space_in_mb: 8192
 # reducing overhead from the TCP protocol itself, at the cost of increasing
 # latency if you block for cross-datacenter responses.
 # inter_dc_tcp_nodelay: false
+
+# Relaxation of environment checks.
+#
+# Scylla places certain requirements on its environment.  If these requirements are
+# not met, performance and reliability can be degraded.
+#
+# These requirements include:
+#    - A filesystem with good support for aysnchronous I/O (AIO). Currently,
+#      this means XFS.
+#
+# false: strict environment checks are in place; do not start if they are not met.
+# true: relaxed environment checks; performance and reliability may degraade.
+#
+# developer_mode: false
--- a/configure.py
+++ b/configure.py
@@ -183,6 +183,7 @@ scylla_tests = [
    'tests/managed_vector_test',
    'tests/crc_test',
    'tests/flush_queue_test',
+    'tests/dynamic_bitset_test',
 ]

 apps = [
@@ -280,6 +281,8 @@ scylla_core = (['database.cc',
                 'cql3/statements/schema_altering_statement.cc',
                 'cql3/statements/ks_prop_defs.cc',
                 'cql3/statements/modification_statement.cc',
+                 'cql3/statements/parsed_statement.cc',
+                 'cql3/statements/property_definitions.cc',
                 'cql3/statements/update_statement.cc',
                 'cql3/statements/delete_statement.cc',
                 'cql3/statements/batch_statement.cc',
@@ -339,6 +342,7 @@ scylla_core = (['database.cc',
                 'utils/rate_limiter.cc',
                 'utils/compaction_manager.cc',
                 'utils/file_lock.cc',
+                 'utils/dynamic_bitset.cc',
                 'gms/version_generator.cc',
                 'gms/versioned_value.cc',
                 'gms/gossiper.cc',
@@ -374,6 +378,8 @@ scylla_core = (['database.cc',
                 'service/storage_service.cc',
                 'service/pending_range_calculator_service.cc',
                 'service/load_broadcaster.cc',
+                 'service/pager/paging_state.cc',
+                 'service/pager/query_pagers.cc',
                 'streaming/streaming.cc',
                 'streaming/stream_task.cc',
                 'streaming/stream_session.cc',
@@ -394,6 +400,7 @@ scylla_core = (['database.cc',
                 'streaming/messages/file_message_header.cc',
                 'streaming/messages/outgoing_file_message.cc',
                 'streaming/messages/incoming_file_message.cc',
+                 'streaming/stream_session_state.cc',
                 'gc_clock.cc',
                 'partition_slice_builder.cc',
                 'init.cc',
@@ -479,6 +486,7 @@ tests_not_using_seastar_test_framework = set([
    'tests/crc_test',
    'tests/perf/perf_sstable',
    'tests/managed_vector_test',
+    'tests/dynamic_bitset_test',
 ])

 for t in tests_not_using_seastar_test_framework:
@@ -495,7 +503,7 @@ deps['tests/sstable_test'] += ['tests/sstable_datafile_test.cc']
 deps['tests/bytes_ostream_test'] = ['tests/bytes_ostream_test.cc']
 deps['tests/UUID_test'] = ['utils/UUID_gen.cc', 'tests/UUID_test.cc']
 deps['tests/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'tests/murmur_hash_test.cc']
-deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'log.cc']
+deps['tests/allocation_strategy_test'] = ['tests/allocation_strategy_test.cc', 'utils/logalloc.cc', 'log.cc', 'utils/dynamic_bitset.cc']

 warnings = [
    '-Wno-mismatched-tags',  # clang-only
--- a/cql3/Cql.g
+++ b/cql3/Cql.g
@@ -856,7 +856,7 @@ dropIndexStatement returns [DropIndexStatement expr]
  * TRUNCATE <CF>;
  */
 truncateStatement returns [::shared_ptr<truncate_statement> stmt]
-    : K_TRUNCATE cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
+    : K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
    ;

 #if 0
--- a/cql3/attributes.cc
+++ b/cql3/attributes.cc
@@ -80,7 +80,7 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
    } catch (marshal_exception e) {
        throw exceptions::invalid_request_exception("Invalid timestamp value");
    }
-    return boost::any_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
+    return value_cast<int64_t>(data_type_for<int64_t>()->deserialize(*tval));
 }

 int32_t attributes::get_time_to_live(const query_options& options) {
@@ -99,7 +99,7 @@ int32_t attributes::get_time_to_live(const query_options& options) {
        throw exceptions::invalid_request_exception("Invalid TTL value");
    }

-    auto ttl = boost::any_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));
+    auto ttl = value_cast<int32_t>(data_type_for<int32_t>()->deserialize(*tval));
    if (ttl < 0) {
        throw exceptions::invalid_request_exception("A TTL must be greater or equal to 0");
    }
--- a/cql3/column_identifier.hh
+++ b/cql3/column_identifier.hh
@@ -55,14 +55,11 @@ namespace cql3 {
 * Represents an identifer for a CQL column definition.
 * TODO : should support light-weight mode without text representation for when not interned
 */
-class column_identifier final : public selection::selectable /* implements IMeasurableMemory*/ {
+class column_identifier final : public selection::selectable {
 public:
    bytes bytes_;
 private:
    sstring _text;
-#if 0
-    private static final long EMPTY_SIZE = ObjectSizes.measure(new ColumnIdentifier("", true));
-#endif
 public:
    column_identifier(sstring raw_text, bool keep_case);

@@ -83,20 +80,6 @@ public:
    }

 #if 0
-    public long unsharedHeapSize()
-    {
-        return EMPTY_SIZE
-             + ObjectSizes.sizeOnHeapOf(bytes)
-             + ObjectSizes.sizeOf(text);
-    }
-
-    public long unsharedHeapSizeExcludingData()
-    {
-        return EMPTY_SIZE
-             + ObjectSizes.sizeOnHeapExcludingData(bytes)
-             + ObjectSizes.sizeOf(text);
-    }
-
    public ColumnIdentifier clone(AbstractAllocator allocator)
    {
        return new ColumnIdentifier(allocator.clone(bytes), text);
--- a/cql3/constants.cc
+++ b/cql3/constants.cc
@@ -160,7 +160,7 @@ void constants::deleter::execute(mutation& m, const exploded_clustering_prefix&
        auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
        m.set_cell(prefix, column, atomic_cell_or_collection::from_collection_mutation(ctype->serialize_mutation_form(coll_m)));
    } else {
-        m.set_cell(prefix, column, params.make_dead_cell());
+        m.set_cell(prefix, column, make_dead_cell(params));
    }
 }

--- a/cql3/constants.hh
+++ b/cql3/constants.hh
@@ -197,7 +197,7 @@ public:

        virtual void execute(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override {
            auto value = _t->bind_and_get(params._options);
-            auto cell = value ? params.make_cell(*value) : params.make_dead_cell();
+            auto cell = value ? make_cell(*value, params) : make_dead_cell(params);
            m.set_cell(prefix, column, std::move(cell));
        }
    };
--- a/cql3/functions/aggregate_fcts.hh
+++ b/cql3/functions/aggregate_fcts.hh
@@ -90,7 +90,7 @@ public:
        if (!values[0]) {
            return;
        }
-        _sum += boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        _sum += value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
    }
 };

@@ -132,7 +132,7 @@ public:
            return;
        }
        ++_count;
-        _sum += boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        _sum += value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
    }
 };

@@ -169,7 +169,7 @@ public:
        if (!values[0]) {
            return;
        }
-        auto val = boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_max) {
            _max = val;
        } else {
@@ -216,7 +216,7 @@ public:
        if (!values[0]) {
            return;
        }
-        auto val = boost::any_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
+        auto val = value_cast<Type>(data_type_for<Type>()->deserialize(*values[0]));
        if (!_min) {
            _min = val;
        } else {
--- a/cql3/functions/functions.cc
+++ b/cql3/functions/functions.cc
@@ -50,6 +50,11 @@ functions::init() {
        if (type == cql3_type::varchar || type == cql3_type::blob) {
            continue;
        }
+        // counters are not supported yet
+        if (type->is_counter()) {
+            warn(unimplemented::cause::COUNTERS);
+            continue;
+        }

        declare(make_to_blob_function(type->get_type()));
        declare(make_from_blob_function(type->get_type()));
--- a/cql3/functions/time_uuid_fcts.hh
+++ b/cql3/functions/time_uuid_fcts.hh
@@ -71,10 +71,10 @@ make_min_timeuuid_fct() {
            return {};
        }
        auto ts_obj = timestamp_type->deserialize(*bb);
-        if (ts_obj.empty()) {
+        if (ts_obj.is_null()) {
            return {};
        }
-        auto ts = boost::any_cast<db_clock::time_point>(ts_obj);
+        auto ts = value_cast<db_clock::time_point>(ts_obj);
        auto uuid = utils::UUID_gen::min_time_UUID(ts.time_since_epoch().count());
        return {timeuuid_type->decompose(uuid)};
    });
@@ -91,10 +91,10 @@ make_max_timeuuid_fct() {
            return {};
        }
        auto ts_obj = timestamp_type->deserialize(*bb);
-        if (ts_obj.empty()) {
+        if (ts_obj.is_null()) {
            return {};
        }
-        auto ts = boost::any_cast<db_clock::time_point>(ts_obj);
+        auto ts = value_cast<db_clock::time_point>(ts_obj);
        auto uuid = utils::UUID_gen::max_time_UUID(ts.time_since_epoch().count());
        return {timeuuid_type->decompose(uuid)};
    });
--- a/cql3/functions/uuid_fcts.hh
+++ b/cql3/functions/uuid_fcts.hh
@@ -54,7 +54,7 @@ shared_ptr<function>
 make_uuid_fct() {
    return make_native_scalar_function<false>("uuid", uuid_type, {},
            [] (serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
-        return {uuid_type->decompose(boost::any(utils::make_random_uuid()))};
+        return {uuid_type->decompose(utils::make_random_uuid())};
    });
 }

--- a/cql3/lists.cc
+++ b/cql3/lists.cc
@@ -113,12 +113,12 @@ lists::value::from_serialized(bytes_view v, list_type type, serialization_format
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol()?!
-        auto l = boost::any_cast<list_type_impl::native_type>(type->deserialize(v, sf));
+        auto l = value_cast<list_type_impl::native_type>(type->deserialize(v, sf));
        std::vector<bytes_opt> elements;
        elements.reserve(l.size());
        for (auto&& element : l) {
            // elements can be null in lists that represent a set of IN values
-            elements.push_back(element.empty() ? bytes_opt() : bytes_opt(type->get_elements_type()->decompose(element)));
+            elements.push_back(element.is_null() ? bytes_opt() : bytes_opt(type->get_elements_type()->decompose(element)));
        }
        return value(std::move(elements));
    } catch (marshal_exception& e) {
@@ -274,7 +274,7 @@ lists::setter_by_index::execute(mutation& m, const exploded_clustering_prefix& p
    if (!existing_list_opt) {
        throw exceptions::invalid_request_exception("Attempted to set an element on a list which is null");
    }
-    collection_mutation::view existing_list_ser = *existing_list_opt;
+    collection_mutation_view existing_list_ser = *existing_list_opt;
    auto ltype = dynamic_pointer_cast<const list_type_impl>(column.type);
    collection_type_impl::mutation_view existing_list = ltype->deserialize_mutation_form(existing_list_ser);
    // we verified that index is an int32_type
@@ -339,7 +339,7 @@ lists::do_append(shared_ptr<term> t,
        } else {
            auto&& to_add = list_value->_elements;
            auto deref = [] (const bytes_opt& v) { return *v; };
-            auto&& newv = collection_mutation::one{list_type_impl::pack(
+            auto&& newv = collection_mutation{list_type_impl::pack(
                    boost::make_transform_iterator(to_add.begin(), deref),
                    boost::make_transform_iterator(to_add.end(), deref),
                    to_add.size(), serialization_format::internal())};
--- a/cql3/maps.cc
+++ b/cql3/maps.cc
@@ -114,30 +114,26 @@ maps::literal::validate_assignable_to(database& db, const sstring& keyspace, col

 assignment_testable::test_result
 maps::literal::test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) {
-    throw std::runtime_error("not implemented");
-#if 0
-    if (!(receiver.type instanceof MapType))
-        return AssignmentTestable.TestResult.NOT_ASSIGNABLE;
-
+    if (!dynamic_pointer_cast<const map_type_impl>(receiver->type)) {
+        return assignment_testable::test_result::NOT_ASSIGNABLE;
+    }
    // If there is no elements, we can't say it's an exact match (an empty map if fundamentally polymorphic).
-    if (entries.isEmpty())
-        return AssignmentTestable.TestResult.WEAKLY_ASSIGNABLE;
-
-    ColumnSpecification keySpec = Maps.keySpecOf(receiver);
-    ColumnSpecification valueSpec = Maps.valueSpecOf(receiver);
+    if (entries.empty()) {
+        return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
+    }
+    auto key_spec = maps::key_spec_of(*receiver);
+    auto value_spec = maps::value_spec_of(*receiver);
    // It's an exact match if all are exact match, but is not assignable as soon as any is non assignable.
-    AssignmentTestable.TestResult res = AssignmentTestable.TestResult.EXACT_MATCH;
-    for (Pair<Term.Raw, Term.Raw> entry : entries)
-    {
-        AssignmentTestable.TestResult t1 = entry.left.testAssignment(keyspace, keySpec);
-        AssignmentTestable.TestResult t2 = entry.right.testAssignment(keyspace, valueSpec);
-        if (t1 == AssignmentTestable.TestResult.NOT_ASSIGNABLE || t2 == AssignmentTestable.TestResult.NOT_ASSIGNABLE)
-            return AssignmentTestable.TestResult.NOT_ASSIGNABLE;
-        if (t1 != AssignmentTestable.TestResult.EXACT_MATCH || t2 != AssignmentTestable.TestResult.EXACT_MATCH)
-            res = AssignmentTestable.TestResult.WEAKLY_ASSIGNABLE;
+    auto res = assignment_testable::test_result::EXACT_MATCH;
+    for (auto entry : entries) {
+        auto t1 = entry.first->test_assignment(db, keyspace, key_spec);
+        auto t2 = entry.second->test_assignment(db, keyspace, value_spec);
+        if (t1 == assignment_testable::test_result::NOT_ASSIGNABLE || t2 == assignment_testable::test_result::NOT_ASSIGNABLE)
+            return assignment_testable::test_result::NOT_ASSIGNABLE;
+        if (t1 != assignment_testable::test_result::EXACT_MATCH || t2 != assignment_testable::test_result::EXACT_MATCH)
+            res = assignment_testable::test_result::WEAKLY_ASSIGNABLE;
    }
    return res;
-#endif
 }

 sstring
@@ -161,7 +157,7 @@ maps::value::from_serialized(bytes_view value, map_type type, serialization_form
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserialize_for_native_protocol?!
-        auto m = boost::any_cast<map_type_impl::native_type>(type->deserialize(value, sf));
+        auto m = value_cast<map_type_impl::native_type>(type->deserialize(value, sf));
        std::map<bytes, bytes, serialized_compare> map(type->get_keys_type()->as_less_comparator());
        for (auto&& e : m) {
            map.emplace(type->get_keys_type()->decompose(e.first),
@@ -350,10 +346,8 @@ maps::discarder_by_key::execute(mutation& m, const exploded_clustering_prefix& p
    if (!key) {
        throw exceptions::invalid_request_exception("Invalid null map key");
    }
-    auto ckey = dynamic_pointer_cast<constants::value>(std::move(key));
-    assert(ckey);
    collection_type_impl::mutation mut;
-    mut.cells.emplace_back(*ckey->_bytes, params.make_dead_cell());
+    mut.cells.emplace_back(*key->get(params._options), params.make_dead_cell());
    auto mtype = static_cast<const map_type_impl*>(column.type.get());
    m.set_cell(prefix, column, mtype->serialize_mutation_form(mut));
 }
--- a/cql3/operation.cc
+++ b/cql3/operation.cc
@@ -216,7 +216,7 @@ operation::element_deletion::prepare(database& db, const sstring& keyspace, cons
        return make_shared<lists::discarder_by_index>(receiver, std::move(idx));
    } else if (&ctype->_kind == &collection_type_impl::kind::set) {
        auto&& elt = _element->prepare(db, keyspace, sets::value_spec_of(receiver.column_specification));
-        return make_shared<sets::discarder>(receiver, std::move(elt));
+        return make_shared<sets::element_discarder>(receiver, std::move(elt));
    } else if (&ctype->_kind == &collection_type_impl::kind::map) {
        auto&& key = _element->prepare(db, keyspace, maps::key_spec_of(*receiver.column_specification));
        return make_shared<maps::discarder_by_key>(receiver, std::move(key));
--- a/cql3/operation.hh
+++ b/cql3/operation.hh
@@ -45,6 +45,7 @@
 #include "exceptions/exceptions.hh"
 #include "database_fwd.hh"
 #include "term.hh"
+#include "update_parameters.hh"

 #include <experimental/optional>

@@ -86,6 +87,14 @@ public:

    virtual ~operation() {}

+    atomic_cell make_dead_cell(const update_parameters& params) const {
+        return params.make_dead_cell();
+    }
+
+    atomic_cell make_cell(bytes_view value, const update_parameters& params) const {
+        return params.make_cell(value);
+    }
+
    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const {
        return _t && _t->uses_function(ks_name, function_name);
    }
@@ -190,13 +199,7 @@ public:
        }

        virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver);
-#if 0
-        protected String toString(ColumnSpecification column)
-        {
-            return String.format("%s[%s] = %s", column.name, selector, value);
-        }

-#endif
        virtual bool is_compatible_with(shared_ptr<raw_update> other) override;
    };

@@ -209,13 +212,6 @@ public:

        virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) override;

-#if 0
-        protected String toString(ColumnSpecification column)
-        {
-            return String.format("%s = %s + %s", column.name, column.name, value);
-        }
-#endif
-
        virtual bool is_compatible_with(shared_ptr<raw_update> other) override;
    };

@@ -228,13 +224,6 @@ public:

        virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) override;

-#if 0
-        protected String toString(ColumnSpecification column)
-        {
-            return String.format("%s = %s - %s", column.name, column.name, value);
-        }
-#endif
-
        virtual bool is_compatible_with(shared_ptr<raw_update> other) override;
    };

@@ -247,12 +236,6 @@ public:

        virtual shared_ptr<operation> prepare(database& db, const sstring& keyspace, const column_definition& receiver) override;

-#if 0
-        protected String toString(ColumnSpecification column)
-        {
-            return String.format("%s = %s - %s", column.name, value, column.name);
-        }
-#endif
        virtual bool is_compatible_with(shared_ptr<raw_update> other) override;
    };

--- a/cql3/query_processor.cc
+++ b/cql3/query_processor.cc
@@ -61,11 +61,10 @@ distributed<query_processor> _the_query_processor;
 const sstring query_processor::CQL_VERSION = "3.2.0";

 class query_processor::internal_state {
-    service::client_state _cs;
    service::query_state _qs;
 public:
    internal_state()
-            : _cs(service::client_state::internal_tag()), _qs(_cs) {
+            : _qs(service::client_state{service::client_state::internal_tag()}) {
    }
    operator service::query_state&() {
        return _qs;
@@ -74,14 +73,13 @@ public:
        return _qs;
    }
    operator service::client_state&() {
-        return _cs;
+        return _qs.get_client_state();
    }
    operator const service::client_state&() const {
-        return _cs;
+        return _qs.get_client_state();
    }
-
    api::timestamp_type next_timestamp() {
-        return _cs.get_timestamp();
+        return _qs.get_client_state().get_timestamp();
    }
 };

@@ -180,7 +178,7 @@ query_processor::prepare(const std::experimental::string_view& query_string, con
 query_processor::get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift)
 {
    if (for_thrift) {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 #if 0
        Integer thriftStatementId = computeThriftId(queryString, keyspace);
        ParsedStatement.Prepared existing = thriftPreparedStatements.get(thriftStatementId);
@@ -211,7 +209,7 @@ query_processor::store_prepared_statement(const std::experimental::string_view&
                                                        MAX_CACHE_PREPARED_MEMORY));
 #endif
    if (for_thrift) {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 #if 0
        Integer statementId = computeThriftId(queryString, keyspace);
        thriftPreparedStatements.put(statementId, prepared);
@@ -302,7 +300,7 @@ query_processor::parse_statement(const sstring_view& query)

 query_options query_processor::make_internal_options(
        ::shared_ptr<statements::parsed_statement::prepared> p,
-        const std::initializer_list<boost::any>& values) {
+        const std::initializer_list<data_value>& values) {
    if (p->bound_names.size() != values.size()) {
        throw std::invalid_argument(sprint("Invalid number of values. Expecting %d but got %d", p->bound_names.size(), values.size()));
    }
@@ -310,9 +308,9 @@ query_options query_processor::make_internal_options(
    std::vector<bytes_opt> bound_values;
    for (auto& v : values) {
        auto& n = *ni++;
-        if (v.type() == typeid(bytes)) {
-            bound_values.push_back({boost::any_cast<bytes>(v)});
-        } else if (v.empty()) {
+        if (v.type() == bytes_type) {
+            bound_values.push_back({value_cast<bytes>(v)});
+        } else if (v.is_null()) {
            bound_values.push_back({});
        } else {
            bound_values.push_back({n->type->decompose(v)});
@@ -335,7 +333,10 @@ query_options query_processor::make_internal_options(

 future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
        const std::experimental::string_view& query_string,
-        const std::initializer_list<boost::any>& values) {
+        const std::initializer_list<data_value>& values) {
+    if (log.is_enabled(logging::log_level::trace)) {
+        log.trace("execute_internal: \"{}\" ({})", query_string, ::join(", ", values));
+    }
    auto p = prepare_internal(query_string);
    auto opts = make_internal_options(p, values);
    return do_with(std::move(opts),
--- a/cql3/query_processor.hh
+++ b/cql3/query_processor.hh
@@ -323,12 +323,12 @@ public:
 #endif
 private:
    ::shared_ptr<statements::parsed_statement::prepared> prepare_internal(const std::experimental::string_view& query);
-    query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<boost::any>&);
+    query_options make_internal_options(::shared_ptr<statements::parsed_statement::prepared>, const std::initializer_list<data_value>&);

 public:
    future<::shared_ptr<untyped_result_set>> execute_internal(
            const std::experimental::string_view& query_string,
-            const std::initializer_list<boost::any>& = { });
+            const std::initializer_list<data_value>& = { });

    /*
     * This function provides a timestamp that is guaranteed to be higher than any timestamp
--- a/cql3/restrictions/multi_column_restriction.hh
+++ b/cql3/restrictions/multi_column_restriction.hh
@@ -374,7 +374,7 @@ public:
    }

    virtual std::vector<bytes_opt> bounds(statements::bound b, const query_options& options) const override {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 #if 0
        return Composites.toByteBuffers(boundsAsComposites(b, options));
 #endif
--- a/cql3/restrictions/statement_restrictions.cc
+++ b/cql3/restrictions/statement_restrictions.cc
@@ -41,13 +41,13 @@ public:

    ::shared_ptr<primary_key_restrictions<T>> do_merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) const {
        if (restriction->is_multi_column()) {
-            throw std::runtime_error("not implemented");
+            throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
        }
        return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(schema, restriction);
    }
    ::shared_ptr<primary_key_restrictions<T>> merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) override {
        if (restriction->is_multi_column()) {
-            throw std::runtime_error("not implemented");
+            throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
        }
        if (restriction->is_on_token()) {
            return static_pointer_cast<token_restriction>(restriction);
--- a/cql3/result_set.hh
+++ b/cql3/result_set.hh
@@ -80,7 +80,7 @@ public:

 private:
    const uint32_t _column_count;
-    ::shared_ptr<service::pager::paging_state> _paging_state;
+    ::shared_ptr<const service::pager::paging_state> _paging_state;

 public:
    metadata(std::vector<::shared_ptr<column_specification>> names_)
@@ -88,7 +88,7 @@ public:
    { }

    metadata(flag_enum_set flags, std::vector<::shared_ptr<column_specification>> names_, uint32_t column_count,
-            ::shared_ptr<service::pager::paging_state> paging_state)
+            ::shared_ptr<const service::pager::paging_state> paging_state)
        : _flags(flags)
        , names(std::move(names_))
        , _column_count(column_count)
@@ -121,7 +121,7 @@ private:
    }

 public:
-    void set_has_more_pages(::shared_ptr<service::pager::paging_state> paging_state) {
+    void set_has_more_pages(::shared_ptr<const service::pager::paging_state> paging_state) {
        if (!paging_state) {
            return;
        }
@@ -342,6 +342,10 @@ public:
        std::sort(_rows.begin(), _rows.end(), std::forward<RowComparator>(cmp));
    }

+    metadata& get_metadata() {
+        return *_metadata;
+    }
+
    const metadata& get_metadata() const {
        return *_metadata;
    }
--- a/cql3/selection/selection.cc
+++ b/cql3/selection/selection.cc
@@ -125,7 +125,7 @@ protected:
        }
    };

-    std::unique_ptr<selectors> new_selectors() {
+    std::unique_ptr<selectors> new_selectors() const override {
        return std::make_unique<simple_selectors>();
    }
 };
@@ -196,7 +196,7 @@ protected:
        }
    };

-    std::unique_ptr<selectors> new_selectors() {
+    std::unique_ptr<selectors> new_selectors() const override  {
        return std::make_unique<selectors_with_processing>(_factories);
    }
 };
@@ -252,7 +252,7 @@ selection::collect_metadata(schema_ptr schema, const std::vector<::shared_ptr<ra
    return r;
 }

-result_set_builder::result_set_builder(selection& s, db_clock::time_point now, serialization_format sf)
+result_set_builder::result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf)
    : _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
    , _selectors(s.new_selectors())
    , _now(now)
@@ -295,7 +295,7 @@ void result_set_builder::add(const column_definition& def, const query::result_a
    }
 }

-void result_set_builder::add(const column_definition& def, collection_mutation::view c) {
+void result_set_builder::add(const column_definition& def, collection_mutation_view c) {
    auto&& ctype = static_cast<const collection_type_impl*>(def.type.get());
    current->emplace_back(ctype->to_value(c, _serialization_format));
    // timestamps, ttls meaningless for collections
@@ -330,6 +330,98 @@ std::unique_ptr<result_set> result_set_builder::build() {
    return std::move(_result_set);
 }

+result_set_builder::visitor::visitor(
+        cql3::selection::result_set_builder& builder, const schema& s,
+        const selection& selection)
+        : _builder(builder), _schema(s), _selection(selection), _row_count(0) {
+}
+
+void result_set_builder::visitor::add_value(const column_definition& def,
+        query::result_row_view::iterator_type& i) {
+    if (def.type->is_multi_cell()) {
+        auto cell = i.next_collection_cell();
+        if (!cell) {
+            _builder.add_empty();
+            return;
+        }
+        _builder.add(def, *cell);
+    } else {
+        auto cell = i.next_atomic_cell();
+        if (!cell) {
+            _builder.add_empty();
+            return;
+        }
+        _builder.add(def, *cell);
+    }
+}
+
+void result_set_builder::visitor::accept_new_partition(const partition_key& key,
+        uint32_t row_count) {
+    _partition_key = key.explode(_schema);
+    _row_count = row_count;
+}
+
+void result_set_builder::visitor::accept_new_partition(uint32_t row_count) {
+    _row_count = row_count;
+}
+
+void result_set_builder::visitor::accept_new_row(const clustering_key& key,
+        const query::result_row_view& static_row,
+        const query::result_row_view& row) {
+    _clustering_key = key.explode(_schema);
+    accept_new_row(static_row, row);
+}
+
+void result_set_builder::visitor::accept_new_row(
+        const query::result_row_view& static_row,
+        const query::result_row_view& row) {
+    auto static_row_iterator = static_row.iterator();
+    auto row_iterator = row.iterator();
+    _builder.new_row();
+    for (auto&& def : _selection.get_columns()) {
+        switch (def->kind) {
+        case column_kind::partition_key:
+            _builder.add(_partition_key[def->component_index()]);
+            break;
+        case column_kind::clustering_key:
+            if (_clustering_key.size() > def->component_index()) {
+                _builder.add(_clustering_key[def->component_index()]);
+            } else {
+                _builder.add({});
+            }
+            break;
+        case column_kind::regular_column:
+            add_value(*def, row_iterator);
+            break;
+        case column_kind::compact_column:
+            add_value(*def, row_iterator);
+            break;
+        case column_kind::static_column:
+            add_value(*def, static_row_iterator);
+            break;
+        default:
+            assert(0);
+        }
+    }
+}
+
+void result_set_builder::visitor::accept_partition_end(
+        const query::result_row_view& static_row) {
+    if (_row_count == 0) {
+        _builder.new_row();
+        auto static_row_iterator = static_row.iterator();
+        for (auto&& def : _selection.get_columns()) {
+            if (def->is_partition_key()) {
+                _builder.add(_partition_key[def->component_index()]);
+            } else if (def->is_static()) {
+                add_value(*def, static_row_iterator);
+            } else {
+                _builder.add_empty();
+            }
+        }
+    }
+}
+
 api::timestamp_type result_set_builder::timestamp_of(size_t idx) {
    return _timestamps[idx];
 }
--- a/cql3/selection/selection.hh
+++ b/cql3/selection/selection.hh
@@ -161,7 +161,7 @@ public:
        return std::find(_columns.begin(), _columns.end(), &def) != _columns.end();
    }

-    ::shared_ptr<metadata> get_result_metadata() {
+    ::shared_ptr<metadata> get_result_metadata() const {
        return _metadata;
    }

@@ -186,16 +186,16 @@ private:
 public:
    static ::shared_ptr<selection> from_selectors(database& db, schema_ptr schema, const std::vector<::shared_ptr<raw_selector>>& raw_selectors);

-    virtual std::unique_ptr<selectors> new_selectors() = 0;
+    virtual std::unique_ptr<selectors> new_selectors() const = 0;

    /**
     * Returns a range of CQL3 columns this selection needs.
     */
-    auto const& get_columns() {
+    auto const& get_columns() const {
        return _columns;
    }

-    uint32_t get_column_count() {
+    uint32_t get_column_count() const {
        return _columns.size();
    }

@@ -238,15 +238,39 @@ private:
    const db_clock::time_point _now;
    serialization_format _serialization_format;
 public:
-    result_set_builder(selection& s, db_clock::time_point now, serialization_format sf);
+    result_set_builder(const selection& s, db_clock::time_point now, serialization_format sf);
    void add_empty();
    void add(bytes_opt value);
    void add(const column_definition& def, const query::result_atomic_cell_view& c);
-    void add(const column_definition& def, collection_mutation::view c);
+    void add(const column_definition& def, collection_mutation_view c);
    void new_row();
    std::unique_ptr<result_set> build();
    api::timestamp_type timestamp_of(size_t idx);
    int32_t ttl_of(size_t idx);
+    
+    // Implements ResultVisitor concept from query.hh
+    class visitor {
+    protected:
+        result_set_builder& _builder;
+        const schema& _schema;
+        const selection& _selection;
+        uint32_t _row_count;
+        std::vector<bytes> _partition_key;
+        std::vector<bytes> _clustering_key;
+    public:
+        visitor(cql3::selection::result_set_builder& builder, const schema& s, const selection&);
+        visitor(visitor&&) = default;
+
+        void add_value(const column_definition& def, query::result_row_view::iterator_type& i);
+        void accept_new_partition(const partition_key& key, uint32_t row_count);
+        void accept_new_partition(uint32_t row_count);
+        void accept_new_row(const clustering_key& key,
+                const query::result_row_view& static_row,
+                const query::result_row_view& row);
+        void accept_new_row(const query::result_row_view& static_row,
+                const query::result_row_view& row);
+        void accept_partition_end(const query::result_row_view& static_row);
+    };
 private:
    bytes_opt get_value(data_type t, query::result_atomic_cell_view c);
 };
--- a/cql3/sets.cc
+++ b/cql3/sets.cc
@@ -125,7 +125,7 @@ sets::value::from_serialized(bytes_view v, set_type type, serialization_format s
        // Collections have this small hack that validate cannot be called on a serialized object,
        // but compose does the validation (so we're fine).
        // FIXME: deserializeForNativeProtocol?!
-        auto s = boost::any_cast<set_type_impl::native_type>(type->deserialize(v, sf));
+        auto s = value_cast<set_type_impl::native_type>(type->deserialize(v, sf));
        std::set<bytes, serialized_compare> elements(type->get_elements_type()->as_less_comparator());
        for (auto&& element : s) {
            elements.insert(elements.end(), type->get_elements_type()->decompose(element));
@@ -284,16 +284,11 @@ sets::discarder::execute(mutation& m, const exploded_clustering_prefix& row_key,
    auto kill = [&] (bytes idx) {
        mut.cells.push_back({std::move(idx), params.make_dead_cell()});
    };
-    // This can be either a set or a single element
-    auto cvalue = dynamic_pointer_cast<constants::value>(value);
-    if (cvalue) {
-        kill(cvalue->_bytes ? *cvalue->_bytes : bytes());
-    } else {
-        auto svalue = static_pointer_cast<sets::value>(value);
-        mut.cells.reserve(svalue->_elements.size());
-        for (auto&& e : svalue->_elements) {
-            kill(e);
-        }
+    auto svalue = dynamic_pointer_cast<sets::value>(value);
+    assert(svalue);
+    mut.cells.reserve(svalue->_elements.size());
+    for (auto&& e : svalue->_elements) {
+        kill(e);
    }
    auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
    m.set_cell(row_key, column,
@@ -301,4 +296,17 @@ sets::discarder::execute(mutation& m, const exploded_clustering_prefix& row_key,
                    ctype->serialize_mutation_form(mut)));
 }

+void sets::element_discarder::execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params)
+{
+    assert(column.type->is_multi_cell() && "Attempted to remove items from a frozen set");
+    auto elt = _t->bind(params._options);
+    if (!elt) {
+        throw exceptions::invalid_request_exception("Invalid null set element");
+    }
+    collection_type_impl::mutation mut;
+    mut.cells.emplace_back(*elt->get(params._options), params.make_dead_cell());
+    auto ctype = static_pointer_cast<const collection_type_impl>(column.type);
+    m.set_cell(row_key, column, ctype->serialize_mutation_form(mut));
+}
+
 }
--- a/cql3/sets.hh
+++ b/cql3/sets.hh
@@ -133,6 +133,13 @@ public:
        }
        virtual void execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) override;
    };
+
+    class element_discarder : public operation {
+    public:
+        element_discarder(const column_definition& column, shared_ptr<term> t)
+            : operation(column, std::move(t)) { }
+        virtual void execute(mutation& m, const exploded_clustering_prefix& row_key, const update_parameters& params) override;
+    };
 };

 }
--- a/cql3/single_column_relation.hh
+++ b/cql3/single_column_relation.hh
@@ -159,7 +159,7 @@ protected:
    virtual shared_ptr<restrictions::restriction> new_contains_restriction(database& db, schema_ptr schema,
                                                 ::shared_ptr<variable_specifications> bound_names,
                                                 bool is_key) override {
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 #if 0
        ColumnDefinition columnDef = toColumnDefinition(schema, entity);
        Term term = toTerm(toReceivers(schema, columnDef), value, schema.ksName, bound_names);
--- a/cql3/statements/batch_statement.hh
+++ b/cql3/statements/batch_statement.hh
@@ -322,7 +322,7 @@ public:
    virtual future<shared_ptr<transport::messages::result_message>> execute_internal(
            distributed<service::storage_proxy>& proxy,
            service::query_state& query_state, const query_options& options) override {
-        throw "not implemented";
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 #if 0
        assert !hasConditions;
        for (IMutation mutation : getMutations(BatchQueryOptions.withoutPerStatementVariables(options), true, queryState.getTimestamp()))
--- a/cql3/statements/delete_statement.cc
+++ b/cql3/statements/delete_statement.cc
@@ -45,6 +45,14 @@ namespace cql3 {

 namespace statements {

+delete_statement::delete_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs)
+        : modification_statement{type, bound_terms, std::move(s), std::move(attrs)}
+{ }
+
+bool delete_statement::require_full_clustering_key() const {
+    return false;
+}
+
 void delete_statement::add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
    if (_column_operations.empty()) {
        m.partition().apply_delete(*s, prefix, params.make_tombstone());
@@ -96,5 +104,17 @@ delete_statement::parsed::prepare_internal(database& db, schema_ptr schema, ::sh
    return stmt;
 }

+delete_statement::parsed::parsed(::shared_ptr<cf_name> name,
+                                 ::shared_ptr<attributes::raw> attrs,
+                                 std::vector<::shared_ptr<operation::raw_deletion>> deletions,
+                                 std::vector<::shared_ptr<relation>> where_clause,
+                                 conditions_vector conditions,
+                                 bool if_exists)
+    : modification_statement::parsed(std::move(name), std::move(attrs), std::move(conditions), false, if_exists)
+    , _deletions(std::move(deletions))
+    , _where_clause(std::move(where_clause))
+{ }
+
 }
+
 }
--- a/cql3/statements/delete_statement.hh
+++ b/cql3/statements/delete_statement.hh
@@ -55,13 +55,9 @@ namespace statements {
 */
 class delete_statement : public modification_statement {
 public:
-    delete_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs)
-            : modification_statement{type, bound_terms, std::move(s), std::move(attrs)}
-    { }
+    delete_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs);

-    virtual bool require_full_clustering_key() const override {
-        return false;
-    }
+    virtual bool require_full_clustering_key() const override;

    virtual void add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;

@@ -94,11 +90,7 @@ public:
               std::vector<::shared_ptr<operation::raw_deletion>> deletions,
               std::vector<::shared_ptr<relation>> where_clause,
               conditions_vector conditions,
-               bool if_exists)
-            : modification_statement::parsed(std::move(name), std::move(attrs), std::move(conditions), false, if_exists)
-            , _deletions(std::move(deletions))
-            , _where_clause(std::move(where_clause))
-        { }
+               bool if_exists);
    protected:
        virtual ::shared_ptr<modification_statement> prepare_internal(database& db, schema_ptr schema,
            ::shared_ptr<variable_specifications> bound_names, std::unique_ptr<attributes> attrs);
--- a/cql3/statements/modification_statement.cc
+++ b/cql3/statements/modification_statement.cc
@@ -71,6 +71,81 @@ operator<<(std::ostream& out, modification_statement::statement_type t) {
    return out;
 }

+modification_statement::modification_statement(statement_type type_, uint32_t bound_terms, schema_ptr schema_, std::unique_ptr<attributes> attrs_)
+    : type{type_}
+    , _bound_terms{bound_terms}
+    , s{schema_}
+    , attrs{std::move(attrs_)}
+    , _column_operations{}
+{ }
+
+bool modification_statement::uses_function(const sstring& ks_name, const sstring& function_name) const {
+    if (attrs->uses_function(ks_name, function_name)) {
+        return true;
+    }
+    for (auto&& e : _processed_keys) {
+        auto r = e.second;
+        if (r && r->uses_function(ks_name, function_name)) {
+            return true;
+        }
+    }
+    for (auto&& operation : _column_operations) {
+        if (operation && operation->uses_function(ks_name, function_name)) {
+            return true;
+        }
+    }
+    for (auto&& condition : _column_conditions) {
+        if (condition && condition->uses_function(ks_name, function_name)) {
+            return true;
+        }
+    }
+    for (auto&& condition : _static_conditions) {
+        if (condition && condition->uses_function(ks_name, function_name)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+uint32_t modification_statement::get_bound_terms() {
+    return _bound_terms;
+}
+
+sstring modification_statement::keyspace() const {
+    return s->ks_name();
+}
+
+sstring modification_statement::column_family() const {
+    return s->cf_name();
+}
+
+bool modification_statement::is_counter() const {
+    return s->is_counter();
+}
+
+int64_t modification_statement::get_timestamp(int64_t now, const query_options& options) const {
+    return attrs->get_timestamp(now, options);
+}
+
+bool modification_statement::is_timestamp_set() const {
+    return attrs->is_timestamp_set();
+}
+
+gc_clock::duration modification_statement::get_time_to_live(const query_options& options) const {
+    return gc_clock::duration(attrs->get_time_to_live(options));
+}
+
+void modification_statement::check_access(const service::client_state& state) {
+    warn(unimplemented::cause::PERMISSIONS);
+#if 0
+    state.hasColumnFamilyAccess(keyspace(), columnFamily(), Permission.MODIFY);
+
+    // CAS updates can be used to simulate a SELECT query, so should require Permission.SELECT as well.
+    if (hasConditions())
+        state.hasColumnFamilyAccess(keyspace(), columnFamily(), Permission.SELECT);
+#endif
+}
+
 future<std::vector<mutation>>
 modification_statement::get_mutations(distributed<service::storage_proxy>& proxy, const query_options& options, bool local, int64_t now) {
    auto keys = make_lw_shared(build_partition_keys(options));
@@ -130,9 +205,9 @@ public:
                    const query::result_row_view& row) {
        update_parameters::prefetch_data::row cells;

-        auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation::view>&& cell) {
+        auto add_cell = [&cells] (column_id id, std::experimental::optional<collection_mutation_view>&& cell) {
            if (cell) {
-                cells.emplace(id, collection_mutation::one{to_bytes(cell->data)});
+                cells.emplace(id, collection_mutation{to_bytes(cell->data)});
            }
        };

@@ -549,6 +624,63 @@ bool modification_statement::depends_on_column_family(const sstring& cf_name) co
    return column_family() == cf_name;
 }

+void modification_statement::add_operation(::shared_ptr<operation> op) {
+    if (op->column.is_static()) {
+        _sets_static_columns = true;
+    } else {
+        _sets_regular_columns = true;
+    }
+    _column_operations.push_back(std::move(op));
+}
+
+void modification_statement::add_condition(::shared_ptr<column_condition> cond) {
+    if (cond->column.is_static()) {
+        _sets_static_columns = true;
+        _static_conditions.emplace_back(std::move(cond));
+    } else {
+        _sets_regular_columns = true;
+        _column_conditions.emplace_back(std::move(cond));
+    }
+}
+
+void modification_statement::set_if_not_exist_condition() {
+    _if_not_exists = true;
+}
+
+bool modification_statement::has_if_not_exist_condition() const {
+    return _if_not_exists;
+}
+
+void modification_statement::set_if_exist_condition() {
+    _if_exists = true;
+}
+
+bool modification_statement::has_if_exist_condition() const {
+    return _if_exists;
+}
+
+bool modification_statement::requires_read() {
+    return std::any_of(_column_operations.begin(), _column_operations.end(), [] (auto&& op) {
+        return op->requires_read();
+    });
+}
+
+bool modification_statement::has_conditions() {
+    return _if_not_exists || _if_exists || !_column_conditions.empty() || !_static_conditions.empty();
+}
+
+void modification_statement::validate_where_clause_for_conditions() {
+    //  no-op by default
+}
+
+modification_statement::parsed::parsed(::shared_ptr<cf_name> name, ::shared_ptr<attributes::raw> attrs, conditions_vector conditions, bool if_not_exists, bool if_exists)
+    : cf_statement{std::move(name)}
+    , _attrs{std::move(attrs)}
+    , _conditions{std::move(conditions)}
+    , _if_not_exists{if_not_exists}
+    , _if_exists{if_exists}
+{ }
+
 }

 }
--- a/cql3/statements/modification_statement.hh
+++ b/cql3/statements/modification_statement.hh
@@ -107,84 +107,29 @@ private:
        };

 public:
-    modification_statement(statement_type type_, uint32_t bound_terms, schema_ptr schema_, std::unique_ptr<attributes> attrs_)
-        : type{type_}
-        , _bound_terms{bound_terms}
-        , s{schema_}
-        , attrs{std::move(attrs_)}
-        , _column_operations{}
-    { }
+    modification_statement(statement_type type_, uint32_t bound_terms, schema_ptr schema_, std::unique_ptr<attributes> attrs_);

-    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override {
-        if (attrs->uses_function(ks_name, function_name)) {
-            return true;
-        }
-        for (auto&& e : _processed_keys) {
-            auto r = e.second;
-            if (r && r->uses_function(ks_name, function_name)) {
-                return true;
-            }
-        }
-        for (auto&& operation : _column_operations) {
-            if (operation && operation->uses_function(ks_name, function_name)) {
-                return true;
-            }
-        }
-        for (auto&& condition : _column_conditions) {
-            if (condition && condition->uses_function(ks_name, function_name)) {
-                return true;
-            }
-        }
-        for (auto&& condition : _static_conditions) {
-            if (condition && condition->uses_function(ks_name, function_name)) {
-                return true;
-            }
-        }
-        return false;
-    }
+    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const override;

    virtual bool require_full_clustering_key() const = 0;

    virtual void add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) = 0;

-    virtual uint32_t get_bound_terms() override {
-        return _bound_terms;
-    }
+    virtual uint32_t get_bound_terms() override;

-    virtual sstring keyspace() const {
-        return s->ks_name();
-    }
+    virtual sstring keyspace() const;

-    virtual sstring column_family() const {
-        return s->cf_name();
-    }
+    virtual sstring column_family() const;

-    virtual bool is_counter() const {
-        return s->is_counter();
-    }
+    virtual bool is_counter() const;

-    int64_t get_timestamp(int64_t now, const query_options& options) const {
-        return attrs->get_timestamp(now, options);
-    }
+    int64_t get_timestamp(int64_t now, const query_options& options) const;

-    bool is_timestamp_set() const {
-        return attrs->is_timestamp_set();
-    }
+    bool is_timestamp_set() const;

-    gc_clock::duration get_time_to_live(const query_options& options) const {
-        return gc_clock::duration(attrs->get_time_to_live(options));
-    }
+    gc_clock::duration get_time_to_live(const query_options& options) const;

-    virtual void check_access(const service::client_state& state) override {
-        warn(unimplemented::cause::PERMISSIONS);
-#if 0
-        state.hasColumnFamilyAccess(keyspace(), columnFamily(), Permission.MODIFY);
-
-        // CAS updates can be used to simulate a SELECT query, so should require Permission.SELECT as well.
-        if (hasConditions())
-            state.hasColumnFamilyAccess(keyspace(), columnFamily(), Permission.SELECT);
-#endif
-    }
+    virtual void check_access(const service::client_state& state) override;

    void validate(distributed<service::storage_proxy>&, const service::client_state& state) override;

@@ -192,14 +137,7 @@ public:

    virtual bool depends_on_column_family(const sstring& cf_name) const override;

-    void add_operation(::shared_ptr<operation> op) {
-        if (op->column.is_static()) {
-            _sets_static_columns = true;
-        } else {
-            _sets_regular_columns = true;
-        }
-        _column_operations.push_back(std::move(op));
-    }
+    void add_operation(::shared_ptr<operation> op);

 #if 0
    public Iterable<ColumnDefinition> getColumnsWithConditions()
@@ -212,31 +150,15 @@ public:
    }
 #endif
 public:
-    void add_condition(::shared_ptr<column_condition> cond) {
-        if (cond->column.is_static()) {
-            _sets_static_columns = true;
-            _static_conditions.emplace_back(std::move(cond));
-        } else {
-            _sets_regular_columns = true;
-            _column_conditions.emplace_back(std::move(cond));
-        }
-    }
+    void add_condition(::shared_ptr<column_condition> cond);

-    void set_if_not_exist_condition() {
-        _if_not_exists = true;
-    }
+    void set_if_not_exist_condition();

-    bool has_if_not_exist_condition() const {
-        return _if_not_exists;
-    }
+    bool has_if_not_exist_condition() const;

-    void set_if_exist_condition() {
-        _if_exists = true;
-    }
+    void set_if_exist_condition();

-    bool has_if_exist_condition() const {
-        return _if_exists;
-    }
+    bool has_if_exist_condition() const;

 private:
    void add_key_values(const column_definition& def, ::shared_ptr<restrictions::restriction> values);
@@ -254,11 +176,7 @@ protected:
    const column_definition* get_first_empty_key();

 public:
-    bool requires_read() {
-        return std::any_of(_column_operations.begin(), _column_operations.end(), [] (auto&& op) {
-            return op->requires_read();
-        });
-    }
+    bool requires_read();

 protected:
    future<update_parameters::prefetched_rows_type> read_required_rows(
@@ -269,9 +187,7 @@ protected:
                db::consistency_level cl);

 public:
-    bool has_conditions() {
-        return _if_not_exists || _if_exists || !_column_conditions.empty() || !_static_conditions.empty();
-    }
+    bool has_conditions();

    virtual future<::shared_ptr<transport::messages::result_message>>
    execute(distributed<service::storage_proxy>& proxy, service::query_state& qs, const query_options& options) override;
@@ -428,9 +344,7 @@ protected:
     * processed to check that they are compatible.
     * @throws InvalidRequestException
     */
-    virtual void validate_where_clause_for_conditions() {
-        //  no-op by default
-    }
+    virtual void validate_where_clause_for_conditions();

 public:
    class parsed : public cf_statement {
@@ -443,13 +357,7 @@ public:
        const bool _if_not_exists;
        const bool _if_exists;
    protected:
-        parsed(::shared_ptr<cf_name> name, ::shared_ptr<attributes::raw> attrs, conditions_vector conditions, bool if_not_exists, bool if_exists)
-            : cf_statement{std::move(name)}
-            , _attrs{std::move(attrs)}
-            , _conditions{std::move(conditions)}
-            , _if_not_exists{if_not_exists}
-            , _if_exists{if_exists}
-        { }
+        parsed(::shared_ptr<cf_name> name, ::shared_ptr<attributes::raw> attrs, conditions_vector conditions, bool if_not_exists, bool if_exists);

    public:
        virtual ::shared_ptr<parsed_statement::prepared> prepare(database& db) override;
--- a/cql3/statements/parsed_statement.cc
+++ b/cql3/statements/parsed_statement.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright 2014 Cloudius Systems
+ *
+ * Modified by Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cql3/statements/parsed_statement.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+parsed_statement::~parsed_statement()
+{ }
+
+shared_ptr<variable_specifications> parsed_statement::get_bound_variables() {
+    return _variables;
+}
+
+// Used by the parser and preparable statement
+void parsed_statement::set_bound_variables(const std::vector<::shared_ptr<column_identifier>>& bound_names) {
+    _variables = ::make_shared<variable_specifications>(bound_names);
+}
+
+bool parsed_statement::uses_function(const sstring& ks_name, const sstring& function_name) const {
+    return false;
+}
+
+parsed_statement::prepared::prepared(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_)
+    : statement(std::move(statement_))
+    , bound_names(std::move(bound_names_))
+{ }
+
+parsed_statement::prepared::prepared(::shared_ptr<cql_statement> statement_, const variable_specifications& names)
+    : prepared(statement_, names.get_specifications())
+{ }
+
+parsed_statement::prepared::prepared(::shared_ptr<cql_statement> statement_, variable_specifications&& names)
+    : prepared(statement_, std::move(names).get_specifications())
+{ }
+
+parsed_statement::prepared::prepared(::shared_ptr<cql_statement>&& statement_)
+    : prepared(statement_, std::vector<::shared_ptr<column_specification>>())
+{ }
+
+}
+
+}
--- a/cql3/statements/parsed_statement.hh
+++ b/cql3/statements/parsed_statement.hh
@@ -60,47 +60,29 @@ private:
    ::shared_ptr<variable_specifications> _variables;

 public:
-    virtual ~parsed_statement()
-    { }
+    virtual ~parsed_statement();

-    shared_ptr<variable_specifications> get_bound_variables() {
-        return _variables;
-    }
+    shared_ptr<variable_specifications> get_bound_variables();

-    // Used by the parser and preparable statement
-    void set_bound_variables(const std::vector<::shared_ptr<column_identifier>>& bound_names)
-    {
-        _variables = ::make_shared<variable_specifications>(bound_names);
-    }
+    void set_bound_variables(const std::vector<::shared_ptr<column_identifier>>& bound_names);

    class prepared {
    public:
        const ::shared_ptr<cql_statement> statement;
        const std::vector<::shared_ptr<column_specification>> bound_names;

-        prepared(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_)
-            : statement(std::move(statement_))
-            , bound_names(std::move(bound_names_))
-        { }
+        prepared(::shared_ptr<cql_statement> statement_, std::vector<::shared_ptr<column_specification>> bound_names_);

-        prepared(::shared_ptr<cql_statement> statement_, const variable_specifications& names)
-            : prepared(statement_, names.get_specifications())
-        { }
+        prepared(::shared_ptr<cql_statement> statement_, const variable_specifications& names);

-        prepared(::shared_ptr<cql_statement> statement_, variable_specifications&& names)
-            : prepared(statement_, std::move(names).get_specifications())
-        { }
+        prepared(::shared_ptr<cql_statement> statement_, variable_specifications&& names);

-        prepared(::shared_ptr<cql_statement>&& statement_)
-            : prepared(statement_, std::vector<::shared_ptr<column_specification>>())
-        { }
+        prepared(::shared_ptr<cql_statement>&& statement_);
    };

    virtual ::shared_ptr<prepared> prepare(database& db) = 0;

-    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const {
-        return false;
-    }
+    virtual bool uses_function(const sstring& ks_name, const sstring& function_name) const;
 };

 }
--- a/cql3/statements/property_definitions.cc
+++ b/cql3/statements/property_definitions.cc
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright 2015 Cloudius Systems
+ *
+ * Modified by Cloudius Systems
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cql3/statements/property_definitions.hh"
+
+namespace cql3 {
+
+namespace statements {
+
+property_definitions::property_definitions()
+    : _properties{}
+{ }
+
+void property_definitions::add_property(const sstring& name, sstring value) {
+    auto it = _properties.find(name);
+    if (it != _properties.end()) {
+        throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name));
+    }
+    _properties.emplace(name, value);
+}
+
+void property_definitions::add_property(const sstring& name, const std::map<sstring, sstring>& value) {
+    auto it = _properties.find(name);
+    if (it != _properties.end()) {
+        throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name));
+    }
+    _properties.emplace(name, value);
+}
+
+void property_definitions::validate(const std::set<sstring>& keywords, const std::set<sstring>& obsolete) {
+    for (auto&& kv : _properties) {
+        auto&& name = kv.first;
+        if (keywords.count(name)) {
+            continue;
+        }
+        if (obsolete.count(name)) {
+#if 0
+            logger.warn("Ignoring obsolete property {}", name);
+#endif
+        } else {
+            throw exceptions::syntax_exception(sprint("Unknown property '%s'", name));
+        }
+    }
+}
+
+std::experimental::optional<sstring> property_definitions::get_simple(const sstring& name) const {
+    auto it = _properties.find(name);
+    if (it == _properties.end()) {
+        return std::experimental::nullopt;
+    }
+    try {
+        return boost::any_cast<sstring>(it->second);
+    } catch (const boost::bad_any_cast& e) {
+        throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a string", name));
+    }
+}
+
+std::experimental::optional<std::map<sstring, sstring>> property_definitions::get_map(const sstring& name) const {
+    auto it = _properties.find(name);
+    if (it == _properties.end()) {
+        return std::experimental::nullopt;
+    }
+    try {
+        return boost::any_cast<std::map<sstring, sstring>>(it->second);
+    } catch (const boost::bad_any_cast& e) {
+        throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a map.", name));
+    }
+}
+
+bool property_definitions::has_property(const sstring& name) const {
+    return _properties.find(name) != _properties.end();
+}
+
+sstring property_definitions::get_string(sstring key, sstring default_value) const {
+    auto value = get_simple(key);
+    if (value) {
+        return value.value();
+    } else {
+        return default_value;
+    }
+}
+
+// Return a property value, typed as a Boolean
+bool property_definitions::get_boolean(sstring key, bool default_value) const {
+    auto value = get_simple(key);
+    if (value) {
+        std::string s{value.value()};
+        std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+        return s == "1" || s == "true" || s == "yes";
+    } else {
+        return default_value;
+    }
+}
+
+// Return a property value, typed as a double
+double property_definitions::get_double(sstring key, double default_value) const {
+    auto value = get_simple(key);
+    return to_double(key, value, default_value);
+}
+
+double property_definitions::to_double(sstring key, std::experimental::optional<sstring> value, double default_value) {
+    if (value) {
+        auto val = value.value();
+        try {
+            return std::stod(val);
+        } catch (const std::exception& e) {
+            throw exceptions::syntax_exception(sprint("Invalid double value %s for '%s'", val, key));
+        }
+    } else {
+        return default_value;
+    }
+}
+
+// Return a property value, typed as an Integer
+int32_t property_definitions::get_int(sstring key, int32_t default_value) const {
+    auto value = get_simple(key);
+    return to_int(key, value, default_value);
+}
+
+int32_t property_definitions::to_int(sstring key, std::experimental::optional<sstring> value, int32_t default_value) {
+    if (value) {
+        auto val = value.value();
+        try {
+            return std::stoi(val);
+        } catch (const std::exception& e) {
+            throw exceptions::syntax_exception(sprint("Invalid integer value %s for '%s'", val, key));
+        }
+    } else {
+        return default_value;
+    }
+}
+
+long property_definitions::to_long(sstring key, std::experimental::optional<sstring> value, long default_value) {
+    if (value) {
+        auto val = value.value();
+        try {
+            return std::stol(val);
+        } catch (const std::exception& e) {
+            throw exceptions::syntax_exception(sprint("Invalid long value %s for '%s'", val, key));
+        }
+    } else {
+        return default_value;
+    }
+}
+
+}
+
+}
--- a/cql3/statements/property_definitions.hh
+++ b/cql3/statements/property_definitions.hh
@@ -66,141 +66,38 @@ protected:
 #endif
    std::unordered_map<sstring, boost::any> _properties;

-    property_definitions()
-        : _properties{}
-    { }
+    property_definitions();
 public:
-    void add_property(const sstring& name, sstring value) {
-        auto it = _properties.find(name);
-        if (it != _properties.end()) {
-            throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name));
-        }
-        _properties.emplace(name, value);
-    }
+    void add_property(const sstring& name, sstring value);

-    void add_property(const sstring& name, const std::map<sstring, sstring>& value) {
-        auto it = _properties.find(name);
-        if (it != _properties.end()) {
-            throw exceptions::syntax_exception(sprint("Multiple definition for property '%s'", name));
-        }
-        _properties.emplace(name, value);
-    }
+    void add_property(const sstring& name, const std::map<sstring, sstring>& value);
+
+    void validate(const std::set<sstring>& keywords, const std::set<sstring>& obsolete);

-    void validate(const std::set<sstring>& keywords, const std::set<sstring>& obsolete) {
-        for (auto&& kv : _properties) {
-            auto&& name = kv.first;
-            if (keywords.count(name)) {
-                continue;
-            }
-            if (obsolete.count(name)) {
-#if 0
-                logger.warn("Ignoring obsolete property {}", name);
-#endif
-            } else {
-                throw exceptions::syntax_exception(sprint("Unknown property '%s'", name));
-            }
-        }
-    }
 protected:
-    std::experimental::optional<sstring> get_simple(const sstring& name) const {
-        auto it = _properties.find(name);
-        if (it == _properties.end()) {
-            return std::experimental::nullopt;
-        }
-        try {
-            return boost::any_cast<sstring>(it->second);
-        } catch (const boost::bad_any_cast& e) {
-            throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a string", name));
-        }
-    }
+    std::experimental::optional<sstring> get_simple(const sstring& name) const;
+
+    std::experimental::optional<std::map<sstring, sstring>> get_map(const sstring& name) const;

-    std::experimental::optional<std::map<sstring, sstring>> get_map(const sstring& name) const {
-        auto it = _properties.find(name);
-        if (it == _properties.end()) {
-            return std::experimental::nullopt;
-        }
-        try {
-            return boost::any_cast<std::map<sstring, sstring>>(it->second);
-        } catch (const boost::bad_any_cast& e) {
-            throw exceptions::syntax_exception(sprint("Invalid value for property '%s'. It should be a map.", name));
-        }
-    }
 public:
-    bool has_property(const sstring& name) const {
-        return _properties.find(name) != _properties.end();
-    }
+    bool has_property(const sstring& name) const;

-    sstring get_string(sstring key, sstring default_value) const {
-        auto value = get_simple(key);
-        if (value) {
-            return value.value();
-        } else {
-            return default_value;
-        }
-    }
+    sstring get_string(sstring key, sstring default_value) const;

    // Return a property value, typed as a Boolean
-    bool get_boolean(sstring key, bool default_value) const {
-        auto value = get_simple(key);
-        if (value) {
-            std::string s{value.value()};
-            std::transform(s.begin(), s.end(), s.begin(), ::tolower);
-            return s == "1" || s == "true" || s == "yes";
-        } else {
-            return default_value;
-        }
-    }
+    bool get_boolean(sstring key, bool default_value) const;

    // Return a property value, typed as a double
-    double get_double(sstring key, double default_value) const {
-        auto value = get_simple(key);
-        return to_double(key, value, default_value);
-    }
+    double get_double(sstring key, double default_value) const;

-    static double to_double(sstring key, std::experimental::optional<sstring> value, double default_value) {
-        if (value) {
-            auto val = value.value();
-            try {
-                return std::stod(val);
-            } catch (const std::exception& e) {
-                throw exceptions::syntax_exception(sprint("Invalid double value %s for '%s'", val, key));
-            }
-        } else {
-            return default_value;
-        }
-    }
+    static double to_double(sstring key, std::experimental::optional<sstring> value, double default_value);

    // Return a property value, typed as an Integer
-    int32_t get_int(sstring key, int32_t default_value) const {
-        auto value = get_simple(key);
-        return to_int(key, value, default_value);
-    }
+    int32_t get_int(sstring key, int32_t default_value) const;

-    static int32_t to_int(sstring key, std::experimental::optional<sstring> value, int32_t default_value) {
-        if (value) {
-            auto val = value.value();
-            try {
-                return std::stoi(val);
-            } catch (const std::exception& e) {
-                throw exceptions::syntax_exception(sprint("Invalid integer value %s for '%s'", val, key));
-            }
-        } else {
-            return default_value;
-        }
-    }
+    static int32_t to_int(sstring key, std::experimental::optional<sstring> value, int32_t default_value);

-    static long to_long(sstring key, std::experimental::optional<sstring> value, long default_value) {
-        if (value) {
-            auto val = value.value();
-            try {
-                return std::stol(val);
-            } catch (const std::exception& e) {
-                throw exceptions::syntax_exception(sprint("Invalid long value %s for '%s'", val, key));
-            }
-        } else {
-            return default_value;
-        }
-    }
+    static long to_long(sstring key, std::experimental::optional<sstring> value, long default_value);
 };

 }
--- a/cql3/statements/select_statement.cc
+++ b/cql3/statements/select_statement.cc
@@ -46,6 +46,7 @@
 #include "core/shared_ptr.hh"
 #include "query-result-reader.hh"
 #include "query_result_merger.hh"
+#include "service/pager/query_pagers.hh"

 namespace cql3 {

@@ -53,6 +54,31 @@ namespace statements {

 thread_local const shared_ptr<select_statement::parameters> select_statement::_default_parameters = ::make_shared<select_statement::parameters>();

+select_statement::parameters::parameters()
+    : _is_distinct{false}
+    , _allow_filtering{false}
+{ }
+
+select_statement::parameters::parameters(orderings_type orderings,
+    bool is_distinct,
+    bool allow_filtering)
+    : _orderings{std::move(orderings)}
+    , _is_distinct{is_distinct}
+    , _allow_filtering{allow_filtering}
+{ }
+
+bool select_statement::parameters::is_distinct() {
+    return _is_distinct;
+}
+
+bool select_statement::parameters::allow_filtering() {
+    return _allow_filtering;
+}
+
+select_statement::parameters::orderings_type const& select_statement::parameters::orderings() {
+    return _orderings;
+}
+
 select_statement::select_statement(schema_ptr schema,
    uint32_t bound_terms,
    ::shared_ptr<parameters> parameters,
@@ -114,6 +140,14 @@ bool select_statement::depends_on_column_family(const sstring& cf_name) const {
    return column_family() == cf_name;
 }

+const sstring& select_statement::keyspace() const {
+    return _schema->ks_name();
+}
+
+const sstring& select_statement::column_family() const {
+    return _schema->cf_name();
+}
+
 query::partition_slice
 select_statement::make_partition_slice(const query_options& options) {
    std::vector<column_id> static_columns;
@@ -160,7 +194,7 @@ int32_t select_statement::get_limit(const query_options& options) const {

    try {
        int32_type->validate(*val);
-        auto l = boost::any_cast<int32_t>(int32_type->deserialize(*val));
+        auto l = value_cast<int32_t>(int32_type->deserialize(*val));
        if (l <= 0) {
            throw exceptions::invalid_request_exception("LIMIT must be strictly positive");
        }
@@ -195,37 +229,51 @@ select_statement::execute(distributed<service::storage_proxy>& proxy, service::q
        page_size = DEFAULT_COUNT_PAGE_SIZE;
    }

-    warn(unimplemented::cause::PAGING);
-    return execute(proxy, command, _restrictions->get_partition_key_ranges(options), state, options, now);
+    auto key_ranges = _restrictions->get_partition_key_ranges(options);

-#if 0
-    if (page_size <= 0 || !command || !query_pagers::may_need_paging(command, page_size)) {
-        return execute(proxy, command, state, options, now);
+    if (page_size <= 0
+            || !service::pager::query_pagers::may_need_paging(page_size,
+                    *command, key_ranges)) {
+        return execute(proxy, command, std::move(key_ranges), state, options,
+                now);
    }

-    auto pager = query_pagers::pager(command, cl, state.get_client_state(), options.get_paging_state());
+    auto p = service::pager::query_pagers::pager(_schema, _selection,
+            state, options, command, std::move(key_ranges));

-    if (selection->isAggregate()) {
-        return page_aggregate_query(pager, options, page_size, now);
+    if (_selection->is_aggregate()) {
+        return do_with(
+                cql3::selection::result_set_builder(*_selection, now,
+                        options.get_serialization_format()),
+                [p, page_size, now](auto& builder) {
+                    return do_until([p] {return p->is_exhausted();},
+                            [p, &builder, page_size, now] {
+                                return p->fetch_page(builder, page_size, now);
+                            }
+                    ).then([&builder] {
+                                auto rs = builder.build();
+                                auto msg = ::make_shared<transport::messages::result_message::rows>(std::move(rs));
+                                return make_ready_future<shared_ptr<transport::messages::result_message>>(std::move(msg));
+                            });
+                });
    }

-    // We can't properly do post-query ordering if we page (see #6722)
    if (needs_post_query_ordering()) {
        throw exceptions::invalid_request_exception(
-              "Cannot page queries with both ORDER BY and a IN restriction on the partition key;"
-              " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
+                "Cannot page queries with both ORDER BY and a IN restriction on the partition key;"
+                        " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
    }

-    return pager->fetch_page(page_size).then([this, pager, &options, limit, now] (auto page) {
-        auto msg = process_results(page, options, limit, now);
+    return p->fetch_page(page_size, now).then(
+            [this, p, &options, limit, now](std::unique_ptr<cql3::result_set> rs) {

-        if (!pager->is_exhausted()) {
-            msg->result->metadata->set_has_more_pages(pager->state());
-        }
+                if (!p->is_exhausted()) {
+                    rs->get_metadata().set_has_more_pages(p->state());
+                }

-        return msg;
-    });
-#endif
+                auto msg = ::make_shared<transport::messages::result_message::rows>(std::move(rs));
+                return make_ready_future<shared_ptr<transport::messages::result_message>>(std::move(msg));
+            });
 }

 future<shared_ptr<transport::messages::result_message>>
@@ -281,114 +329,18 @@ select_statement::execute_internal(distributed<service::storage_proxy>& proxy, s
    }
 }

-// Implements ResultVisitor concept from query.hh
-class result_set_building_visitor {
-    cql3::selection::result_set_builder& builder;
-    select_statement& stmt;
-    uint32_t _row_count;
-    std::vector<bytes> _partition_key;
-    std::vector<bytes> _clustering_key;
-public:
-    result_set_building_visitor(cql3::selection::result_set_builder& builder, select_statement& stmt)
-        : builder(builder)
-        , stmt(stmt)
-        , _row_count(0)
-    { }
-
-    void add_value(const column_definition& def, query::result_row_view::iterator_type& i) {
-        if (def.type->is_multi_cell()) {
-            auto cell = i.next_collection_cell();
-            if (!cell) {
-                builder.add_empty();
-                return;
-            }
-            builder.add(def, *cell);
-        } else {
-            auto cell = i.next_atomic_cell();
-            if (!cell) {
-                builder.add_empty();
-                return;
-            }
-            builder.add(def, *cell);
-        }
-    };
-
-    void accept_new_partition(const partition_key& key, uint32_t row_count) {
-        _partition_key = key.explode(*stmt._schema);
-        _row_count = row_count;
-    }
-
-    void accept_new_partition(uint32_t row_count) {
-        _row_count = row_count;
-    }
-
-    void accept_new_row(const clustering_key& key, const query::result_row_view& static_row,
-            const query::result_row_view& row) {
-        _clustering_key = key.explode(*stmt._schema);
-        accept_new_row(static_row, row);
-    }
-
-    void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) {
-        auto static_row_iterator = static_row.iterator();
-        auto row_iterator = row.iterator();
-        builder.new_row();
-        for (auto&& def : stmt._selection->get_columns()) {
-            switch (def->kind) {
-                case column_kind::partition_key:
-                    builder.add(_partition_key[def->component_index()]);
-                    break;
-                case column_kind::clustering_key:
-                    builder.add(_clustering_key[def->component_index()]);
-                    break;
-                case column_kind::regular_column:
-                    add_value(*def, row_iterator);
-                    break;
-                case column_kind::compact_column:
-                    add_value(*def, row_iterator);
-                    break;
-                case column_kind::static_column:
-                    add_value(*def, static_row_iterator);
-                    break;
-                default:
-                    assert(0);
-            }
-        }
-    }
-
-    void accept_partition_end(const query::result_row_view& static_row) {
-        if (_row_count == 0) {
-            builder.new_row();
-            auto static_row_iterator = static_row.iterator();
-            for (auto&& def : stmt._selection->get_columns()) {
-                if (def->is_partition_key()) {
-                    builder.add(_partition_key[def->component_index()]);
-                } else if (def->is_static()) {
-                    add_value(*def, static_row_iterator);
-                } else {
-                    builder.add_empty();
-                }
-            }
-        }
-    }
-};
-
-shared_ptr<transport::messages::result_message>
-select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> results, lw_shared_ptr<query::read_command> cmd,
-        const query_options& options, db_clock::time_point now) {
-    cql3::selection::result_set_builder builder(*_selection, now, options.get_serialization_format());
-
-    // FIXME: This special casing saves us the cost of copying an already
-    // linearized response. When we switch views to scattered_reader this will go away.
-    if (results->buf().is_linearized()) {
-        query::result_view view(results->buf().view());
-        view.consume(cmd->slice, result_set_building_visitor(builder, *this));
-    } else {
-        bytes_ostream w(results->buf());
-        query::result_view view(w.linearize());
-        view.consume(cmd->slice, result_set_building_visitor(builder, *this));
-    }
+shared_ptr<transport::messages::result_message> select_statement::process_results(
+        foreign_ptr<lw_shared_ptr<query::result>> results,
+        lw_shared_ptr<query::read_command> cmd, const query_options& options,
+        db_clock::time_point now) {

+    cql3::selection::result_set_builder builder(*_selection, now,
+            options.get_serialization_format());
+    query::result_view::consume(results->buf(), cmd->slice,
+            cql3::selection::result_set_builder::visitor(builder, *_schema,
+                    *_selection));
    auto rs = builder.build();
+
    if (needs_post_query_ordering()) {
        rs->sort(_ordering_comparator);
        if (_is_reversed) {
@@ -399,6 +351,18 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
    return ::make_shared<transport::messages::result_message::rows>(std::move(rs));
 }

+select_statement::raw_statement::raw_statement(::shared_ptr<cf_name> cf_name,
+                                               ::shared_ptr<parameters> parameters,
+                                               std::vector<::shared_ptr<selection::raw_selector>> select_clause,
+                                               std::vector<::shared_ptr<relation>> where_clause,
+                                               ::shared_ptr<term::raw> limit)
+    : cf_statement(std::move(cf_name))
+    , _parameters(std::move(parameters))
+    , _select_clause(std::move(select_clause))
+    , _where_clause(std::move(where_clause))
+    , _limit(std::move(limit))
+{ }
+
 ::shared_ptr<parsed_statement::prepared>
 select_statement::raw_statement::prepare(database& db) {
    schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
--- a/cql3/statements/select_statement.hh
+++ b/cql3/statements/select_statement.hh
@@ -63,7 +63,6 @@ namespace statements {
 *
 */
 class select_statement : public cql_statement {
-    friend class result_set_building_visitor;
 public:
    class parameters final {
    public:
@@ -73,20 +72,13 @@ public:
        const bool _is_distinct;
        const bool _allow_filtering;
    public:
-        parameters()
-            : _is_distinct{false}
-            , _allow_filtering{false}
-        { }
+        parameters();
        parameters(orderings_type orderings,
            bool is_distinct,
-            bool allow_filtering)
-            : _orderings{std::move(orderings)}
-            , _is_distinct{is_distinct}
-            , _allow_filtering{allow_filtering}
-        { }
-        bool is_distinct() { return _is_distinct; }
-        bool allow_filtering() { return _allow_filtering; }
-        orderings_type const& orderings() { return _orderings; }
+            bool allow_filtering);
+        bool is_distinct();
+        bool allow_filtering();
+        orderings_type const& orderings();
    };
 private:
    static constexpr int DEFAULT_COUNT_PAGE_SIZE = 10000;
@@ -196,13 +188,9 @@ public:
    }
 #endif

-    const sstring& keyspace() const {
-        return _schema->ks_name();
-    }
+    const sstring& keyspace() const;

-    const sstring& column_family() const {
-        return _schema->cf_name();
-    }
+    const sstring& column_family() const;

    query::partition_slice make_partition_slice(const query_options& options);

@@ -458,13 +446,7 @@ public:
            ::shared_ptr<parameters> parameters,
            std::vector<::shared_ptr<selection::raw_selector>> select_clause,
            std::vector<::shared_ptr<relation>> where_clause,
-            ::shared_ptr<term::raw> limit)
-        : cf_statement(std::move(cf_name))
-        , _parameters(std::move(parameters))
-        , _select_clause(std::move(select_clause))
-        , _where_clause(std::move(where_clause))
-        , _limit(std::move(limit))
-    { }
+            ::shared_ptr<term::raw> limit);

    virtual ::shared_ptr<prepared> prepare(database& db) override;
 private:
--- a/cql3/statements/update_statement.cc
+++ b/cql3/statements/update_statement.cc
@@ -48,6 +48,14 @@ namespace cql3 {

 namespace statements {

+update_statement::update_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs)
+    : modification_statement{type, bound_terms, std::move(s), std::move(attrs)}
+{ }
+
+bool update_statement::require_full_clustering_key() const {
+    return true;
+}
+
 void update_statement::add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) {
    if (s->is_dense()) {
        if (!prefix || (prefix.size() == 1 && prefix.components().front().empty())) {
@@ -100,6 +108,16 @@ void update_statement::add_update_for_key(mutation& m, const exploded_clustering
 #endif
 }

+update_statement::parsed_insert::parsed_insert(::shared_ptr<cf_name> name,
+                                               ::shared_ptr<attributes::raw> attrs,
+                                               std::vector<::shared_ptr<column_identifier::raw>> column_names,
+                                               std::vector<::shared_ptr<term::raw>> column_values,
+                                               bool if_not_exists)
+    : modification_statement::parsed{std::move(name), std::move(attrs), conditions_vector{}, if_not_exists, false}
+    , _column_names{std::move(column_names)}
+    , _column_values{std::move(column_values)}
+{ }
+
 ::shared_ptr<modification_statement>
 update_statement::parsed_insert::prepare_internal(database& db, schema_ptr schema,
    ::shared_ptr<variable_specifications> bound_names, std::unique_ptr<attributes> attrs)
@@ -148,6 +166,16 @@ update_statement::parsed_insert::prepare_internal(database& db, schema_ptr schem
    return stmt;
 }

+update_statement::parsed_update::parsed_update(::shared_ptr<cf_name> name,
+                                               ::shared_ptr<attributes::raw> attrs,
+                                               std::vector<std::pair<::shared_ptr<column_identifier::raw>, ::shared_ptr<operation::raw_update>>> updates,
+                                               std::vector<relation_ptr> where_clause,
+                                               conditions_vector conditions)
+    : modification_statement::parsed(std::move(name), std::move(attrs), std::move(conditions), false, false)
+    , _updates(std::move(updates))
+    , _where_clause(std::move(where_clause))
+{ }
+
 ::shared_ptr<modification_statement>
 update_statement::parsed_update::prepare_internal(database& db, schema_ptr schema,
    ::shared_ptr<variable_specifications> bound_names, std::unique_ptr<attributes> attrs)
--- a/cql3/statements/update_statement.hh
+++ b/cql3/statements/update_statement.hh
@@ -64,14 +64,9 @@ public:
    private static final Constants.Value EMPTY = new Constants.Value(ByteBufferUtil.EMPTY_BYTE_BUFFER);
 #endif

-    update_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs)
-        : modification_statement{type, bound_terms, std::move(s), std::move(attrs)}
-    { }
-
+    update_statement(statement_type type, uint32_t bound_terms, schema_ptr s, std::unique_ptr<attributes> attrs);
 private:
-    virtual bool require_full_clustering_key() const override {
-        return true;
-    }
+    virtual bool require_full_clustering_key() const override;

    virtual void add_update_for_key(mutation& m, const exploded_clustering_prefix& prefix, const update_parameters& params) override;
 public:
@@ -92,11 +87,7 @@ public:
                      ::shared_ptr<attributes::raw> attrs,
                      std::vector<::shared_ptr<column_identifier::raw>> column_names,
                      std::vector<::shared_ptr<term::raw>> column_values,
-                      bool if_not_exists)
-            : modification_statement::parsed{std::move(name), std::move(attrs), conditions_vector{}, if_not_exists, false}
-            , _column_names{std::move(column_names)}
-            , _column_values{std::move(column_values)}
-        { }
+                      bool if_not_exists);

        virtual ::shared_ptr<modification_statement> prepare_internal(database& db, schema_ptr schema,
                    ::shared_ptr<variable_specifications> bound_names, std::unique_ptr<attributes> attrs) override;
@@ -122,11 +113,7 @@ public:
            ::shared_ptr<attributes::raw> attrs,
            std::vector<std::pair<::shared_ptr<column_identifier::raw>, ::shared_ptr<operation::raw_update>>> updates,
            std::vector<relation_ptr> where_clause,
-            conditions_vector conditions)
-                : modification_statement::parsed(std::move(name), std::move(attrs), std::move(conditions), false, false)
-                , _updates(std::move(updates))
-                , _where_clause(std::move(where_clause))
-        { }
+            conditions_vector conditions);
    protected:
        virtual ::shared_ptr<modification_statement> prepare_internal(database& db, schema_ptr schema,
                    ::shared_ptr<variable_specifications> bound_names, std::unique_ptr<attributes> attrs);
--- a/cql3/tuples.hh
+++ b/cql3/tuples.hh
@@ -224,14 +224,6 @@ public:
            // We don't "need" that override but it saves us the allocation of a Value object if used
            return options.make_temporary(_type->build_value(bind_internal(options)));
        }
-
-#if 0
-        @Override
-        public String toString()
-        {
-            return tupleToString(elements);
-        }
-#endif
    };

    /**
@@ -259,7 +251,7 @@ public:
            try {
                // Collections have this small hack that validate cannot be called on a serialized object,
                // but the deserialization does the validation (so we're fine).
-                auto l = boost::any_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
+                auto l = value_cast<list_type_impl::native_type>(type->deserialize(value, options.get_serialization_format()));
                auto ttype = dynamic_pointer_cast<const tuple_type_impl>(type->get_elements_type());
                assert(ttype);

--- a/cql3/untyped_result_set.hh
+++ b/cql3/untyped_result_set.hh
@@ -66,7 +66,7 @@ public:
        }
        template<typename T>
        T get_as(const sstring& name) const {
-            return boost::any_cast<T>(data_type_for<T>()->deserialize(get_blob(name)));
+            return value_cast<T>(data_type_for<T>()->deserialize(get_blob(name)));
        }
        // this could maybe be done as an overload of get_as (or something), but that just
        // muddles things for no real gain. Let user (us) attempt to know what he is doing instead.
@@ -75,12 +75,12 @@ public:
                data_type_for<K>(), data_type valtype =
                data_type_for<V>()) const {
            auto vec =
-                    boost::any_cast<const map_type_impl::native_type&>(
+                    value_cast<map_type_impl::native_type>(
                            map_type_impl::get_instance(keytype, valtype, false)->deserialize(
                                    get_blob(name)));
            std::transform(vec.begin(), vec.end(), out,
                    [](auto& p) {
-                        return std::pair<K, V>(boost::any_cast<const K&>(p.first), boost::any_cast<const V&>(p.second));
+                        return std::pair<K, V>(value_cast<K>(p.first), value_cast<V>(p.second));
                    });
        }
        template<typename K, typename V, typename ... Rest>
--- a/cql3/update_parameters.cc
+++ b/cql3/update_parameters.cc
@@ -43,7 +43,7 @@

 namespace cql3 {

-std::experimental::optional<collection_mutation::view>
+std::experimental::optional<collection_mutation_view>
 update_parameters::get_prefetched_list(
    const partition_key& pkey,
    const clustering_key& row_key,
--- a/cql3/update_parameters.hh
+++ b/cql3/update_parameters.hh
@@ -86,7 +86,7 @@ public:
                return pk_eq(k1.first, k2.first) && ck_eq(k1.second, k2.second);
            }
        };
-        using row = std::unordered_map<column_id, collection_mutation::one>;
+        using row = std::unordered_map<column_id, collection_mutation>;
    public:
        std::unordered_map<key, row, key_hashing, key_equality> rows;
        schema_ptr schema;
@@ -183,7 +183,7 @@ public:
        return _timestamp;
    }

-    std::experimental::optional<collection_mutation::view> get_prefetched_list(
+    std::experimental::optional<collection_mutation_view> get_prefetched_list(
        const partition_key& pkey, const clustering_key& row_key, const column_definition& column) const;
 };

--- a/cql3/variable_specifications.hh
+++ b/cql3/variable_specifications.hh
@@ -88,14 +88,6 @@ public:
        }
        _specs[bind_index] = spec;
    }
-
-#if 0
-    @Override
-    public String toString()
-    {
-        return Arrays.toString(specs);
-    }
-#endif
 };

 }
--- a/database.cc
+++ b/database.cc
@@ -416,6 +416,23 @@ static std::vector<sstring> parse_fname(sstring filename) {
    return comps;
 }

+static bool belongs_to_current_shard(const schema& s, const partition_key& first, const partition_key& last) {
+    auto key_shard = [&s] (const partition_key& pk) {
+        auto token = dht::global_partitioner().get_token(s, pk);
+        return dht::shard_of(token);
+    };
+    auto s1 = key_shard(first);
+    auto s2 = key_shard(last);
+    auto me = engine().cpu_id();
+    return (s1 <= me) && (me <= s2);
+}
+
+static bool belongs_to_current_shard(const schema& s, range<partition_key> r) {
+    assert(r.start());
+    assert(r.end());
+    return belongs_to_current_shard(s, r.start()->value(), r.end()->value());
+}
+
 future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sstring fname) {

    using namespace sstables;
@@ -432,19 +449,29 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
    update_sstables_known_generation(comps.generation);
    assert(_sstables->count(comps.generation) == 0);

-    auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
-    auto fut = sst->load();
-    return std::move(fut).then([this, sst = std::move(sst)] () mutable {
-        add_sstable(std::move(*sst));
-        return make_ready_future<>();
-    }).then_wrapped([fname, comps = std::move(comps)] (future<> f) {
+    auto fut = sstable::get_sstable_key_range(*_schema, _schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+    return std::move(fut).then([this, sstdir = std::move(sstdir), comps] (range<partition_key> r) {
+        // Checks whether or not sstable belongs to current shard.
+        if (!belongs_to_current_shard(*_schema, std::move(r))) {
+            sstable::mark_sstable_for_deletion(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+            return make_ready_future<>();
+        }
+
+        auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
+        auto fut = sst->load();
+        return std::move(fut).then([this, sst = std::move(sst)] () mutable {
+            add_sstable(std::move(*sst));
+            return make_ready_future<>();
+        });
+    }).then_wrapped([fname, comps] (future<> f) {
        try {
            f.get();
        } catch (malformed_sstable_exception& e) {
            dblog.error("malformed sstable {}: {}. Refusing to boot", fname, e.what());
            throw;
        } catch(...) {
-            dblog.error("Unrecognized error while processing {}: Refusing to boot", fname);
+            dblog.error("Unrecognized error while processing {}: {}. Refusing to boot",
+                    fname, std::current_exception());
            throw;
        }
        return make_ready_future<entry_descriptor>(std::move(comps));
@@ -462,19 +489,6 @@ void column_family::add_sstable(sstables::sstable&& sstable) {
 }

 void column_family::add_sstable(lw_shared_ptr<sstables::sstable> sstable) {
-    auto key_shard = [this] (const partition_key& pk) {
-        auto token = dht::global_partitioner().get_token(*_schema, pk);
-        return dht::shard_of(token);
-    };
-    auto s1 = key_shard(sstable->get_first_partition_key(*_schema));
-    auto s2 = key_shard(sstable->get_last_partition_key(*_schema));
-    auto me = engine().cpu_id();
-    auto included = (s1 <= me) && (me <= s2);
-    if (!included) {
-        dblog.info("sstable {} not relevant for this shard, ignoring", sstable->get_filename());
-        sstable->mark_for_deletion();
-        return;
-    }
    auto generation = sstable->generation();
    // allow in-progress reads to continue using old list
    _sstables = make_lw_shared<sstable_list>(*_sstables);
@@ -546,6 +560,10 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
        sstables::sstable::version_types::ka,
        sstables::sstable::format_types::big);

+    auto memtable_size = old->occupancy().total_space();
+
+    _config.cf_stats->pending_memtables_flushes_count++;
+    _config.cf_stats->pending_memtables_flushes_bytes += memtable_size;
    newtab->set_unshared();
    dblog.debug("Flushing to {}", newtab->get_filename());
    return newtab->write_components(*old).then([this, newtab, old] {
@@ -569,23 +587,33 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr<memtable> old) {
                return newtab->create_links(dir);
            });
        });
-    }).then([this, old, newtab] {
+    }).then_wrapped([this, old, newtab, memtable_size] (future<> ret) {
+        _config.cf_stats->pending_memtables_flushes_count--;
+        _config.cf_stats->pending_memtables_flushes_bytes -= memtable_size;
        dblog.debug("Flushing done");
-        // We must add sstable before we call update_cache(), because
-        // memtable's data after moving to cache can be evicted at any time.
-        auto old_sstables = _sstables;
-        add_sstable(newtab);
-        old->mark_flushed(newtab);
-        return update_cache(*old, std::move(old_sstables));
-    }).then_wrapped([this, old] (future<> ret) {
        try {
            ret.get();

-            _memtables->erase(boost::range::find(*_memtables, old));
-            dblog.debug("Memtable replaced");
+            // We must add sstable before we call update_cache(), because
+            // memtable's data after moving to cache can be evicted at any time.
+            auto old_sstables = _sstables;
+            add_sstable(newtab);
+            old->mark_flushed(newtab);
+
            trigger_compaction();

-            return make_ready_future<stop_iteration>(stop_iteration::yes);
+            return update_cache(*old, std::move(old_sstables)).then_wrapped([this, old] (future<> f) {
+                try {
+                    f.get();
+                } catch(...) {
+                    dblog.error("failed to move memtable to cache: {}", std::current_exception());
+                }
+
+                _memtables->erase(boost::range::find(*_memtables, old));
+                dblog.debug("Memtable replaced");
+
+                return make_ready_future<stop_iteration>(stop_iteration::yes);
+            });
        } catch (...) {
            dblog.error("failed to write sstable: {}", std::current_exception());
        }
@@ -704,21 +732,22 @@ column_family::compact_sstables(sstables::compaction_descriptor descriptor) {
            std::unordered_set<sstables::shared_sstable> s(
                    sstables_to_compact->begin(), sstables_to_compact->end());
            for (const auto& oldtab : *current_sstables) {
+                // Checks if oldtab is a sstable not being compacted.
                if (!s.count(oldtab.second)) {
                    update_stats_for_new_sstable(oldtab.second->data_size());
                    _sstables->emplace(oldtab.first, oldtab.second);
                }
+            }

-                for (const auto& newtab : *new_tables) {
-                    // FIXME: rename the new sstable(s). Verify a rename doesn't cause
-                    // problems for the sstable object.
-                    update_stats_for_new_sstable(newtab.second->data_size());
-                    _sstables->emplace(newtab.first, newtab.second);
-                }
+            for (const auto& newtab : *new_tables) {
+                // FIXME: rename the new sstable(s). Verify a rename doesn't cause
+                // problems for the sstable object.
+                update_stats_for_new_sstable(newtab.second->data_size());
+                _sstables->emplace(newtab.first, newtab.second);
+            }

-                for (const auto& oldtab : *sstables_to_compact) {
-                    oldtab->mark_for_deletion();
-                }
+            for (const auto& oldtab : *sstables_to_compact) {
+                oldtab->mark_for_deletion();
            }
        });
    });
@@ -731,7 +760,13 @@ column_family::load_new_sstables(std::vector<sstables::entry_descriptor> new_tab
        return sst->load().then([this, sst] {
            return sst->mutate_sstable_level(0);
        }).then([this, sst] {
-            this->add_sstable(sst);
+            auto first = sst->get_first_partition_key(*_schema);
+            auto last = sst->get_last_partition_key(*_schema);
+            if (belongs_to_current_shard(*_schema, first, last)) {
+                this->add_sstable(sst);
+            } else {
+                sst->mark_for_deletion();
+            }
            return make_ready_future<>();
        });
    });
@@ -823,58 +858,77 @@ future<> column_family::populate(sstring sstdir) {
    auto verifier = make_lw_shared<std::unordered_map<unsigned long, status>>();
    auto descriptor = make_lw_shared<sstable_descriptor>();

-    return lister::scan_dir(sstdir, { directory_entry_type::regular }, [this, sstdir, verifier, descriptor] (directory_entry de) {
-        // FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
-        return probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
-            if (verifier->count(entry.generation)) {
-                if (verifier->at(entry.generation) == status::has_toc_file) {
-                    if (entry.component == sstables::sstable::component_type::TOC) {
-                        throw sstables::malformed_sstable_exception("Invalid State encountered. TOC file already processed");
+    return do_with(std::vector<future<>>(), [this, sstdir, verifier, descriptor] (std::vector<future<>>& futures) {
+        return lister::scan_dir(sstdir, { directory_entry_type::regular }, [this, sstdir, verifier, descriptor, &futures] (directory_entry de) {
+            // FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
+            auto f = probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
+                if (verifier->count(entry.generation)) {
+                    if (verifier->at(entry.generation) == status::has_toc_file) {
+                        if (entry.component == sstables::sstable::component_type::TOC) {
+                            throw sstables::malformed_sstable_exception("Invalid State encountered. TOC file already processed");
+                        } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
+                            throw sstables::malformed_sstable_exception("Invalid State encountered. Temporary TOC file found after TOC file was processed");
+                        }
+                    } else if (entry.component == sstables::sstable::component_type::TOC) {
+                        verifier->at(entry.generation) = status::has_toc_file;
                    } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
-                        throw sstables::malformed_sstable_exception("Invalid State encountered. Temporary TOC file found after TOC file was processed");
+                        verifier->at(entry.generation) = status::has_temporary_toc_file;
                    }
-                } else if (entry.component == sstables::sstable::component_type::TOC) {
-                    verifier->at(entry.generation) = status::has_toc_file;
-                } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
-                    verifier->at(entry.generation) = status::has_temporary_toc_file;
-                }
-            } else {
-                if (entry.component == sstables::sstable::component_type::TOC) {
-                    verifier->emplace(entry.generation, status::has_toc_file);
-                } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
-                    verifier->emplace(entry.generation, status::has_temporary_toc_file);
                } else {
-                    verifier->emplace(entry.generation, status::has_some_file);
+                    if (entry.component == sstables::sstable::component_type::TOC) {
+                        verifier->emplace(entry.generation, status::has_toc_file);
+                    } else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
+                        verifier->emplace(entry.generation, status::has_temporary_toc_file);
+                    } else {
+                        verifier->emplace(entry.generation, status::has_some_file);
+                    }
                }
-            }

-            // Retrieve both version and format used for this column family.
-            if (!descriptor->version) {
-                descriptor->version = entry.version;
-            }
-            if (!descriptor->format) {
-                descriptor->format = entry.format;
-            }
-        });
-    }).then([verifier, sstdir, descriptor, this] {
-        return parallel_for_each(*verifier, [sstdir = std::move(sstdir), descriptor, this] (auto v) {
-            if (v.second == status::has_temporary_toc_file) {
-                unsigned long gen = v.first;
-                assert(descriptor->version);
-                sstables::sstable::version_types version = descriptor->version.value();
-                assert(descriptor->format);
-                sstables::sstable::format_types format = descriptor->format.value();
-
-                if (engine().cpu_id() != 0) {
-                    dblog.info("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
-                    return make_ready_future<>();
+                // Retrieve both version and format used for this column family.
+                if (!descriptor->version) {
+                    descriptor->version = entry.version;
                }
-                // shard 0 is the responsible for removing a partial sstable.
-                return sstables::sstable::remove_sstable_with_temp_toc(_schema->ks_name(), _schema->cf_name(), sstdir, gen, version, format);
-            } else if (v.second != status::has_toc_file) {
-                throw sstables::malformed_sstable_exception(sprint("At directory: %s: no TOC found for SSTable with generation %d!. Refusing to boot", sstdir, v.first));
-            }
+                if (!descriptor->format) {
+                    descriptor->format = entry.format;
+                }
+            });
+
+            // push future returned by probe_file into an array of futures,
+            // so that the supplied callback will not block scan_dir() from
+            // reading the next entry in the directory.
+            futures.push_back(std::move(f));
+
            return make_ready_future<>();
+        }).then([&futures] {
+            return when_all(futures.begin(), futures.end()).then([] (std::vector<future<>> ret) {
+                try {
+                    for (auto& f : ret) {
+                        f.get();
+                    }
+                } catch(...) {
+                    throw;
+                }
+            });
+        }).then([verifier, sstdir, descriptor, this] {
+            return parallel_for_each(*verifier, [sstdir = std::move(sstdir), descriptor, this] (auto v) {
+                if (v.second == status::has_temporary_toc_file) {
+                    unsigned long gen = v.first;
+                    assert(descriptor->version);
+                    sstables::sstable::version_types version = descriptor->version.value();
+                    assert(descriptor->format);
+                    sstables::sstable::format_types format = descriptor->format.value();
+
+                    if (engine().cpu_id() != 0) {
+                        dblog.info("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
+                        return make_ready_future<>();
+                    }
+                    // shard 0 is the responsible for removing a partial sstable.
+                    return sstables::sstable::remove_sstable_with_temp_toc(_schema->ks_name(), _schema->cf_name(), sstdir, gen, version, format);
+                } else if (v.second != status::has_toc_file) {
+                    throw sstables::malformed_sstable_exception(sprint("At directory: %s: no TOC found for SSTable with generation %d!. Refusing to boot", sstdir, v.first));
+                }
+                return make_ready_future<>();
+            });
        });
    });
 }
@@ -910,6 +964,20 @@ database::setup_collectd() {
                , scollectd::make_typed(scollectd::data_type::GAUGE, [this] {
            return _dirty_memory_region_group.memory_used();
    })));
+
+    _collectd.push_back(
+        scollectd::add_polled_metric(scollectd::type_instance_id("memtables"
+                , scollectd::per_cpu_plugin_instance
+                , "queue_length", "pending_flushes")
+                , scollectd::make_typed(scollectd::data_type::GAUGE, _cf_stats.pending_memtables_flushes_count)
+    ));
+
+    _collectd.push_back(
+        scollectd::add_polled_metric(scollectd::type_instance_id("memtables"
+                , scollectd::per_cpu_plugin_instance
+                , "bytes", "pending_flushes")
+                , scollectd::make_typed(scollectd::data_type::GAUGE, _cf_stats.pending_memtables_flushes_bytes)
+    ));
 }

 database::~database() {
@@ -968,7 +1036,7 @@ template <typename Func>
 static future<>
 do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring& _cf_name, Func&& func) {
    using namespace db::schema_tables;
-    static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result::value_type&)>>::value,
+    static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result_value_type&)>>::value,
                  "bad Func signature");


@@ -1003,11 +1071,11 @@ do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring

 future<> database::parse_system_tables(distributed<service::storage_proxy>& proxy) {
    using namespace db::schema_tables;
-    return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result::value_type &v) {
+    return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result_value_type &v) {
        auto ksm = create_keyspace_from_schema_partition(v);
        return create_keyspace(ksm);
    }).then([&proxy, this] {
-        return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result::value_type &v) {
+        return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result_value_type &v) {
            return create_tables_from_tables_partition(proxy, v.second).then([this] (std::map<sstring, schema_ptr> tables) {
                for (auto& t: tables) {
                    auto s = t.second;
@@ -1079,7 +1147,7 @@ void database::add_keyspace(sstring name, keyspace k) {
 }

 void database::update_keyspace(const sstring& name) {
-    throw std::runtime_error("not implemented");
+    throw std::runtime_error("update keyspace not implemented");
 }

 void database::drop_keyspace(const sstring& name) {
@@ -1244,6 +1312,7 @@ keyspace::make_column_family_config(const schema& s) const {
    cfg.enable_cache = _config.enable_cache;
    cfg.max_memtable_size = _config.max_memtable_size;
    cfg.dirty_memory_region_group = _config.dirty_memory_region_group;
+    cfg.cf_stats = _config.cf_stats;
    cfg.enable_incremental_backups = _config.enable_incremental_backups;

    return cfg;
@@ -1416,7 +1485,7 @@ column_family::query(const query::read_command& cmd, const std::vector<query::pa
            return do_until([&qs] { return !qs.limit || qs.range_empty; }, [this, &qs] {
                return qs.reader().then([this, &qs](mutation_opt mo) {
                    if (mo) {
-                        auto p_builder = qs.builder.add_partition(mo->key());
+                        auto p_builder = qs.builder.add_partition(*mo->schema(), mo->key());
                        auto is_distinct = qs.cmd.slice.options.contains(query::partition_slice::option::distinct);
                        auto limit = !is_distinct ? qs.limit : 1;
                        mo->partition().query(p_builder, *_schema, qs.cmd.timestamp, limit);
@@ -1433,7 +1502,7 @@ column_family::query(const query::read_command& cmd, const std::vector<query::pa
    }).finally([lc, this]() mutable {
        _stats.reads.mark(lc);
        if (lc.is_start()) {
-            _stats.estimated_read.add(lc.latency_in_nano(), _stats.reads.count);
+            _stats.estimated_read.add(lc.latency(), _stats.reads.count);
        }
    });
 }
@@ -1447,28 +1516,51 @@ column_family::as_mutation_source() const {

 future<lw_shared_ptr<query::result>>
 database::query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges) {
-    static auto make_empty = [] {
-        return make_ready_future<lw_shared_ptr<query::result>>(make_lw_shared(query::result()));
-    };
-
-    try {
-        column_family& cf = find_column_family(cmd.cf_id);
-        return cf.query(cmd, ranges);
-    } catch (const no_such_column_family&) {
-        // FIXME: load from sstables
-        return make_empty();
-    }
+    column_family& cf = find_column_family(cmd.cf_id);
+    return cf.query(cmd, ranges);
 }

 future<reconcilable_result>
 database::query_mutations(const query::read_command& cmd, const query::partition_range& range) {
+    column_family& cf = find_column_family(cmd.cf_id);
+    return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
+}
+
+std::unordered_set<sstring> database::get_initial_tokens() {
+    std::unordered_set<sstring> tokens;
+    sstring tokens_string = get_config().initial_token();
    try {
-        column_family& cf = find_column_family(cmd.cf_id);
-        return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
-    } catch (const no_such_column_family&) {
-        // FIXME: load from sstables
-        return make_ready_future<reconcilable_result>(reconcilable_result());
+        boost::split(tokens, tokens_string, boost::is_any_of(sstring(",")));
+    } catch (...) {
+        throw std::runtime_error(sprint("Unable to parse initial_token=%s", tokens_string));
    }
+    tokens.erase("");
+    return tokens;
+}
+
+std::experimental::optional<gms::inet_address> database::get_replace_address() {
+    auto& cfg = get_config();
+    sstring replace_address = cfg.replace_address();
+    sstring replace_address_first_boot = cfg.replace_address_first_boot();
+    try {
+        if (!replace_address.empty()) {
+            return gms::inet_address(replace_address);
+        } else if (!replace_address_first_boot.empty()) {
+            return gms::inet_address(replace_address_first_boot);
+        }
+        return std::experimental::nullopt;
+    } catch (...) {
+        return std::experimental::nullopt;
+    }
+}
+
+bool database::is_replacing() {
+    sstring replace_address_first_boot = get_config().replace_address_first_boot();
+    if (!replace_address_first_boot.empty() && db::system_keyspace::bootstrap_complete()) {
+        dblog.info("Replace address on first boot requested; this node is already bootstrapped");
+        return false;
+    }
+    return bool(get_replace_address());
 }

 std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c) {
@@ -1500,8 +1592,7 @@ future<> database::apply_in_memory(const frozen_mutation& m, const db::replay_po
        auto& cf = find_column_family(m.column_family_id());
        cf.apply(m, rp);
    } catch (no_such_column_family&) {
-        // TODO: log a warning
-        // FIXME: load keyspace meta-data from storage
+        dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
    }
    return make_ready_future<>();
 }
@@ -1589,6 +1680,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
        cfg.max_memtable_size = std::numeric_limits<size_t>::max();
    }
    cfg.dirty_memory_region_group = &_dirty_memory_region_group;
+    cfg.cf_stats = &_cf_stats;
    cfg.enable_incremental_backups = _cfg->incremental_backups();
    return cfg;
 }
@@ -1857,7 +1949,7 @@ future<> column_family::snapshot(sstring name) {
 }

 future<bool> column_family::snapshot_exists(sstring tag) {
-    sstring jsondir = _config.datadir + "/snapshots/";
+    sstring jsondir = _config.datadir + "/snapshots/" + tag;
    return engine().open_directory(std::move(jsondir)).then_wrapped([] (future<file> f) {
        try {
            f.get0();
@@ -1933,7 +2025,11 @@ future<> column_family::clear_snapshot(sstring tag) {
 future<std::unordered_map<sstring, column_family::snapshot_details>> column_family::get_snapshot_details() {
    std::unordered_map<sstring, snapshot_details> all_snapshots;
    return do_with(std::move(all_snapshots), [this] (auto& all_snapshots) {
-        return lister::scan_dir(_config.datadir + "/snapshots",  { directory_entry_type::directory }, [this, &all_snapshots] (directory_entry de) {
+        return engine().file_exists(_config.datadir + "/snapshots").then([this, &all_snapshots](bool file_exists) {
+            if (!file_exists) {
+                return make_ready_future<>();
+            }
+            return lister::scan_dir(_config.datadir + "/snapshots",  { directory_entry_type::directory }, [this, &all_snapshots] (directory_entry de) {
            auto snapshot_name = de.name;
            auto snapshot = _config.datadir + "/snapshots/" + snapshot_name;
            all_snapshots.emplace(snapshot_name, snapshot_details());
@@ -1968,6 +2064,7 @@ future<std::unordered_map<sstring, column_family::snapshot_details>> column_fami
                    });
                });
            });
+        });
        }).then([&all_snapshots] {
            return std::move(all_snapshots);
        });
--- a/database.hh
+++ b/database.hh
@@ -102,6 +102,16 @@ class replay_position_reordered_exception : public std::exception {};
 using memtable_list = std::vector<lw_shared_ptr<memtable>>;
 using sstable_list = sstables::sstable_list;

+// The CF has a "stats" structure. But we don't want all fields here,
+// since some of them are fairly complex for exporting to collectd. Also,
+// that structure matches what we export via the API, so better leave it
+// untouched. And we need more fields. We will summarize it in here what
+// we need.
+struct cf_stats {
+    int64_t pending_memtables_flushes_count = 0;
+    int64_t pending_memtables_flushes_bytes = 0;
+};
+
 class column_family {
 public:
    struct config {
@@ -113,6 +123,7 @@ public:
        bool enable_incremental_backups = false;
        size_t max_memtable_size = 5'000'000;
        logalloc::region_group* dirty_memory_region_group = nullptr;
+        ::cf_stats* cf_stats = nullptr;
    };
    struct no_commitlog {};
    struct stats {
@@ -183,8 +194,7 @@ private:
    mutation_source sstables_as_mutation_source();
    key_source sstables_as_key_source() const;
    partition_presence_checker make_partition_presence_checker(lw_shared_ptr<sstable_list> old_sstables);
-    // We will use highres because hopefully it won't take more than a few usecs
-    std::chrono::high_resolution_clock::time_point _sstable_writes_disabled_at;
+    std::chrono::steady_clock::time_point _sstable_writes_disabled_at;
 public:
    // Creates a mutation reader which covers all data sources for this column family.
    // Caller needs to ensure that column_family remains live (FIXME: relax this).
@@ -205,6 +215,10 @@ public:
        return _cache;
    }

+    row_cache& get_row_cache() {
+        return _cache;
+    }
+
    logalloc::occupancy_stats occupancy() const;
 public:
    column_family(schema_ptr schema, config cfg, db::commitlog& cl, compaction_manager&);
@@ -236,7 +250,7 @@ public:
    // to call this separately in all shards first, to guarantee that none of them are writing
    // new data before you can safely assume that the whole node is disabled.
    future<int64_t> disable_sstable_write() {
-        _sstable_writes_disabled_at = std::chrono::high_resolution_clock::now();
+        _sstable_writes_disabled_at = std::chrono::steady_clock::now();
        return _sstables_lock.write_lock().then([this] {
            return make_ready_future<int64_t>((*_sstables->end()).first);
        });
@@ -244,10 +258,10 @@ public:

    // SSTable writes are now allowed again, and generation is updated to new_generation
    // returns the amount of microseconds elapsed since we disabled writes.
-    std::chrono::high_resolution_clock::duration enable_sstable_write(int64_t new_generation) {
+    std::chrono::steady_clock::duration enable_sstable_write(int64_t new_generation) {
        update_sstables_known_generation(new_generation);
        _sstables_lock.write_unlock();
-        return std::chrono::high_resolution_clock::now() - _sstable_writes_disabled_at;
+        return std::chrono::steady_clock::now() - _sstable_writes_disabled_at;
    }

    // Make sure the generation numbers are sequential, starting from "start".
@@ -310,6 +324,10 @@ public:
        return _stats;
    }

+    compaction_manager& get_compaction_manager() const {
+        return _compaction_manager;
+    }
+
    template<typename Func, typename Result = futurize_t<std::result_of_t<Func()>>>
    Result run_with_compaction_disabled(Func && func) {
        ++_compaction_disabled;
@@ -445,6 +463,7 @@ public:
        bool enable_incremental_backups = false;
        size_t max_memtable_size = 5'000'000;
        logalloc::region_group* dirty_memory_region_group = nullptr;
+        ::cf_stats* cf_stats = nullptr;
    };
 private:
    std::unique_ptr<locator::abstract_replication_strategy> _replication_strategy;
@@ -503,6 +522,7 @@ public:
 //   use shard_of() for data

 class database {
+    ::cf_stats _cf_stats;
    logalloc::region_group _dirty_memory_region_group;
    std::unordered_map<sstring, keyspace> _keyspaces;
    std::unordered_map<utils::UUID, lw_shared_ptr<column_family>> _column_families;
@@ -549,6 +569,9 @@ public:
        return _commitlog.get();
    }

+    compaction_manager& get_compaction_manager() {
+        return _compaction_manager;
+    }
    const compaction_manager& get_compaction_manager() const {
        return _compaction_manager;
    }
@@ -633,6 +656,10 @@ public:
    const logalloc::region_group& dirty_memory_region_group() const {
        return _dirty_memory_region_group;
    }
+
+    std::unordered_set<sstring> get_initial_tokens();
+    std::experimental::optional<gms::inet_address> get_replace_address();
+    bool is_replacing();
 };

 // FIXME: stub
@@ -647,7 +674,7 @@ column_family::apply(const mutation& m, const db::replay_position& rp) {
    seal_on_overflow();
    _stats.writes.mark(lc);
    if (lc.is_start()) {
-        _stats.estimated_write.add(lc.latency_in_nano(), _stats.writes.count);
+        _stats.estimated_write.add(lc.latency(), _stats.writes.count);
    }
 }

@@ -681,7 +708,7 @@ column_family::apply(const frozen_mutation& m, const db::replay_position& rp) {
    seal_on_overflow();
    _stats.writes.mark(lc);
    if (lc.is_start()) {
-        _stats.estimated_write.add(lc.latency_in_nano(), _stats.writes.count);
+        _stats.estimated_write.add(lc.latency(), _stats.writes.count);
    }
 }

--- a/database_fwd.hh
+++ b/database_fwd.hh
@@ -35,8 +35,8 @@ class column_definition;
 // keys.hh
 class exploded_clustering_prefix;
 class partition_key;
-class clustering_key;
 class clustering_key_prefix;
+using clustering_key = clustering_key_prefix;

 // memtable.hh
 class memtable;
--- a/db/batchlog_manager.cc
+++ b/db/batchlog_manager.cc
@@ -56,6 +56,7 @@
 #include "unimplemented.hh"
 #include "db/config.hh"
 #include "gms/failure_detector.hh"
+#include "service/storage_service.hh"

 static logging::logger logger("batchlog_manager");

@@ -87,10 +88,8 @@ future<> db::batchlog_manager::start() {
                                );
                            });
                });
-        _timer.arm(
-                lowres_clock::now()
-                        + std::chrono::milliseconds(
-                                service::storage_service::RING_DELAY));
+        auto ring_delay = service::get_local_storage_service().get_ring_delay();
+        _timer.arm(lowres_clock::now() + ring_delay);
    }
    return make_ready_future<>();
 }
@@ -115,7 +114,7 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
 mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<mutation>& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now) {
    auto schema = _qp.db().local().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
    auto key = partition_key::from_singular(*schema, id);
-    auto timestamp = db_clock::now_in_usecs();
+    auto timestamp = api::new_timestamp();
    auto data = [this, &mutations] {
        std::vector<frozen_mutation> fm(mutations.begin(), mutations.end());
        const auto size = std::accumulate(fm.begin(), fm.end(), size_t(0), [](size_t s, auto& m) {
@@ -132,7 +131,7 @@ mutation db::batchlog_manager::get_batch_log_mutation_for(const std::vector<muta
    mutation m(key, schema);
    m.set_cell({}, to_bytes("version"), version, timestamp);
    m.set_cell({}, to_bytes("written_at"), now, timestamp);
-    m.set_cell({}, to_bytes("data"), std::move(data), timestamp);
+    m.set_cell({}, to_bytes("data"), data_value(std::move(data)), timestamp);

    return m;
 }
--- a/db/commitlog/commitlog.cc
+++ b/db/commitlog/commitlog.cc
@@ -55,6 +55,7 @@
 #include <core/rwlock.hh>
 #include <core/gate.hh>
 #include <core/fstream.hh>
+#include <seastar/core/memory.hh>
 #include <net/byteorder.hh>

 #include "commitlog.hh"
@@ -89,7 +90,7 @@ public:

 db::commitlog::config::config(const db::config& cfg)
    : commit_log_location(cfg.commitlog_directory())
-    , commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb())
+    , commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb() >= 0 ? cfg.commitlog_total_space_in_mb() : memory::stats().total_memory() >> 20)
    , commitlog_segment_size_in_mb(cfg.commitlog_segment_size_in_mb())
    , commitlog_sync_period_in_ms(cfg.commitlog_sync_batch_window_in_ms())
    , mode(cfg.commitlog_sync() == "batch" ? sync_mode::BATCH : sync_mode::PERIODIC)
@@ -280,6 +281,43 @@ private:
 * A single commit log file on disk. Manages creation of the file and writing mutations to disk,
 * as well as tracking the last mutation position of any "dirty" CFs covered by the segment file. Segment
 * files are initially allocated to a fixed size and can grow to accomidate a larger value if necessary.
+ *
+ * The IO flow is somewhat convoluted and goes something like this:
+ *
+ * Mutation path:
+ *  - Adding data to the segment usually writes into the internal buffer
+ *  - On EOB or overflow we issue a write to disk ("cycle").
+ *      - A cycle call will acquire the segment read lock and send the
+ *        buffer to the corresponding position in the file
+ *  - If we are periodic and crossed a timing threshold, or running "batch" mode
+ *    we might be forced to issue a flush ("sync") after adding data
+ *      - A sync call acquires the write lock, thus locking out writes
+ *        and waiting for pending writes to finish. It then checks the
+ *        high data mark, and issues the actual file flush.
+ *        Note that the write lock is released prior to issuing the
+ *        actual file flush, thus we are allowed to write data to
+ *        after a flush point concurrently with a pending flush.
+ *
+ * Sync timer:
+ *  - In periodic mode, we try to primarily issue sync calls in
+ *    a timer task issued every N seconds. The timer does the same
+ *    operation as the above described sync, and resets the timeout
+ *    so that mutation path will not trigger syncs and delay.
+ *
+ * Note that we do not care which order segment chunks finish writing
+ * to disk, other than all below a flush point must finish before flushing.
+ *
+ * We currently do not wait for flushes to finish before issueing the next
+ * cycle call ("after" flush point in the file). This might not be optimal.
+ *
+ * To close and finish a segment, we first close the gate object that guards
+ * writing data to it, then flush it fully (including waiting for futures create
+ * by the timer to run their course), and finally wait for it to
+ * become "clean", i.e. get notified that all mutations it holds have been
+ * persisted to sstables elsewhere. Once this is done, we can delete the
+ * segment. If a segment (object) is deleted without being fully clean, we
+ * do not remove the file on disk.
+ *
 */

 class db::commitlog::segment: public enable_lw_shared_from_this<segment> {
@@ -315,7 +353,8 @@ public:
    // The commit log entry overhead in bytes (int: length + int: head checksum + int: tail checksum)
    static constexpr size_t entry_overhead_size = 3 * sizeof(uint32_t);
    static constexpr size_t segment_overhead_size = 2 * sizeof(uint32_t);
-    static constexpr size_t descriptor_header_size = 4 * sizeof(uint32_t);
+    static constexpr size_t descriptor_header_size = 5 * sizeof(uint32_t);
+    static constexpr uint32_t segment_magic = ('S'<<24) |('C'<< 16) | ('L' << 8) | 'C';

    // The commit log (chained) sync marker/header size in bytes (int: length + int: checksum [segmentId, position])
    static constexpr size_t sync_marker_size = 2 * sizeof(uint32_t);
@@ -368,6 +407,7 @@ public:
    void reset_sync_time() {
        _sync_time = clock_type::now();
    }
+    // See class comment for info
    future<sseg_ptr> sync() {
        // Note: this is not a marker for when sync was finished.
        // It is when it was initiated
@@ -384,6 +424,7 @@ public:
    future<> shutdown() {
        return _gate.close();
    }
+    // See class comment for info
    future<sseg_ptr> flush(uint64_t pos = 0) {
        auto me = shared_from_this();
        assert(!me.owned());
@@ -429,6 +470,7 @@ public:
    /**
     * Send any buffer contents to disk and get a new tmp buffer
     */
+    // See class comment for info
    future<sseg_ptr> cycle(size_t s = 0) {
        auto size = clear_buffer_slack();
        auto buf = std::move(_buffer);
@@ -483,6 +525,7 @@ public:

        if (off == 0) {
            // first block. write file header.
+            out.write(segment_magic);
            out.write(_desc.ver);
            out.write(_desc.id);
            crc32_nbo crc;
@@ -1094,7 +1137,7 @@ db::commitlog::commitlog(config cfg)
        : _segment_manager(new segment_manager(std::move(cfg))) {
 }

-db::commitlog::commitlog(commitlog&& v)
+db::commitlog::commitlog(commitlog&& v) noexcept
        : _segment_manager(std::move(v._segment_manager)) {
 }

@@ -1170,10 +1213,11 @@ const db::commitlog::config& db::commitlog::active_config() const {
    return _segment_manager->cfg;
 }

-future<subscription<temporary_buffer<char>, db::replay_position>>
+future<std::unique_ptr<subscription<temporary_buffer<char>, db::replay_position>>>
 db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off) {
    return engine().open_file_dma(filename, open_flags::ro).then([next = std::move(next), off](file f) {
-       return read_log_file(std::move(f), std::move(next), off);
+       return std::make_unique<subscription<temporary_buffer<char>, replay_position>>(
+           read_log_file(std::move(f), std::move(next), off));
    });
 }

@@ -1189,6 +1233,8 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
        size_t next = 0;
        size_t start_off = 0;
        size_t skip_to = 0;
+        size_t file_size = 0;
+        size_t corrupt_size = 0;
        bool eof = false;
        bool header = true;

@@ -1232,16 +1278,20 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                }
                // Will throw if we got eof
                data_input in(buf);
+                auto magic = in.read<uint32_t>();
                auto ver = in.read<uint32_t>();
                auto id = in.read<uint64_t>();
                auto checksum = in.read<uint32_t>();

-                if (ver == 0 && id == 0 && checksum == 0) {
+                if (magic == 0 && ver == 0 && id == 0 && checksum == 0) {
                    // let's assume this was an empty (pre-allocated)
                    // file. just skip it.
                    return stop();
                }

+                if (magic != segment::segment_magic) {
+                    throw std::invalid_argument("Not a scylla format commitlog file");
+                }
                crc32_nbo crc;
                crc.process(ver);
                crc.process<int32_t>(id & 0xffffffff);
@@ -1282,7 +1332,11 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type

                auto cs = crc.checksum();
                if (cs != checksum) {
-                    throw std::runtime_error("Checksum error in chunk header");
+                    // if a chunk header checksum is broken, we shall just assume that all
+                    // remaining is as well. We cannot trust the "next" pointer, so...
+                    logger.debug("Checksum error in segment chunk at {}.", pos);
+                    corrupt_size += (file_size - pos);
+                    return stop();
                }

                this->next = next;
@@ -1308,21 +1362,24 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                auto size = in.read<uint32_t>();
                auto checksum = in.read<uint32_t>();

-                if (size == 0) {
-                    // special scylla case: zero padding due to dma blocks
-                    auto slack = next - pos;
-                    return skip(slack);
-                }
+                crc32_nbo crc;
+                crc.process(size);

-                if (size < 3 * sizeof(uint32_t)) {
-                    throw std::runtime_error("Invalid entry size");
+                if (size < 3 * sizeof(uint32_t) || checksum != crc.checksum()) {
+                    auto slack = next - pos;
+                    if (size != 0) {
+                        logger.debug("Segment entry at {} has broken header. Skipping to next chunk ({} bytes)", rp, slack);
+                        corrupt_size += slack;
+                    }
+                    // size == 0 -> special scylla case: zero padding due to dma blocks
+                    return skip(slack);
                }

                if (start_off > pos) {
                    return skip(size - entry_header_size);
                }

-                return fin.read_exactly(size - entry_header_size).then([this, size, checksum, rp](temporary_buffer<char> buf) {
+                return fin.read_exactly(size - entry_header_size).then([this, size, crc = std::move(crc), rp](temporary_buffer<char> buf) mutable {
                    advance(buf);

                    data_input in(buf);
@@ -1331,12 +1388,15 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
                    in.skip(data_size);
                    auto checksum = in.read<uint32_t>();

-                    crc32_nbo crc;
-                    crc.process(size);
                    crc.process_bytes(buf.get(), data_size);

                    if (crc.checksum() != checksum) {
-                        throw std::runtime_error("Checksum error in data entry");
+                        // If we're getting a checksum error here, most likely the rest of
+                        // the file will be corrupt as well. But it does not hurt to retry.
+                        // Just go to the next entry (since "size" in header seemed ok).
+                        logger.debug("Segment entry at {} checksum error. Skipping {} bytes", rp, size);
+                        corrupt_size += size;
+                        return make_ready_future<>();
                    }

                    return s.produce(buf.share(0, data_size), rp);
@@ -1344,10 +1404,18 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
            });
        }
        future<> read_file() {
-            return read_header().then(
-                    [this] {
-                        return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
-                    });
+            return f.size().then([this](uint64_t size) {
+                file_size = size;
+            }).then([this] {
+                return read_header().then(
+                        [this] {
+                            return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
+                }).then([this] {
+                  if (corrupt_size > 0) {
+                      throw segment_data_corruption_error("Data corruption", corrupt_size);
+                  }
+                });
+            });
        }
    };

@@ -1375,6 +1443,10 @@ uint64_t db::commitlog::get_completed_tasks() const {
    return _segment_manager->totals.allocation_count;
 }

+uint64_t db::commitlog::get_flush_count() const {
+    return _segment_manager->totals.flush_count;
+}
+
 uint64_t db::commitlog::get_pending_tasks() const {
    return _segment_manager->totals.pending_operations;
 }
--- a/db/commitlog/commitlog.hh
+++ b/db/commitlog/commitlog.hh
@@ -139,7 +139,7 @@ public:
        const uint32_t ver;
    };

-    commitlog(commitlog&&);
+    commitlog(commitlog&&) noexcept;
    ~commitlog();

    /**
@@ -231,6 +231,7 @@ public:

    uint64_t get_total_size() const;
    uint64_t get_completed_tasks() const;
+    uint64_t get_flush_count() const;
    uint64_t get_pending_tasks() const;
    uint64_t get_num_segments_created() const;
    uint64_t get_num_segments_destroyed() const;
@@ -265,8 +266,21 @@ public:

    typedef std::function<future<>(temporary_buffer<char>, replay_position)> commit_load_reader_func;

+    class segment_data_corruption_error: public std::runtime_error {
+    public:
+        segment_data_corruption_error(std::string msg, uint64_t s)
+                : std::runtime_error(msg), _bytes(s) {
+        }
+        uint64_t bytes() const {
+            return _bytes;
+        }
+    private:
+        uint64_t _bytes;
+    };
+
    static subscription<temporary_buffer<char>, replay_position> read_log_file(file, commit_load_reader_func, position_type = 0);
-    static future<subscription<temporary_buffer<char>, replay_position>> read_log_file(const sstring&, commit_load_reader_func, position_type = 0);
+    static future<std::unique_ptr<subscription<temporary_buffer<char>, replay_position>>> read_log_file(
+            const sstring&, commit_load_reader_func, position_type = 0);
 private:
    commitlog(config);
 };
--- a/db/commitlog/commitlog_replayer.cc
+++ b/db/commitlog/commitlog_replayer.cc
@@ -69,6 +69,7 @@ public:
        uint64_t invalid_mutations = 0;
        uint64_t skipped_mutations = 0;
        uint64_t applied_mutations = 0;
+        uint64_t corrupt_bytes = 0;
    };

    future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
@@ -166,9 +167,16 @@ db::commitlog_replayer::impl::recover(sstring file) {
    return db::commitlog::read_log_file(file,
            std::bind(&impl::process, this, s.get(), std::placeholders::_1,
                    std::placeholders::_2), p).then([](auto s) {
-        auto f = s.done();
+        auto f = s->done();
        return f.finally([s = std::move(s)] {});
-    }).then([s] {
+    }).then_wrapped([s](future<> f) {
+        try {
+            f.get();
+        } catch (commitlog::segment_data_corruption_error& e) {
+            s->corrupt_bytes += e.bytes();
+        } catch (...) {
+            throw;
+        }
        return make_ready_future<stats>(*s);
    });
 }
@@ -233,7 +241,7 @@ db::commitlog_replayer::commitlog_replayer(seastar::sharded<cql3::query_processo
    : _impl(std::make_unique<impl>(qp))
 {}

-db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r)
+db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r) noexcept
    : _impl(std::move(r._impl))
 {}

@@ -250,24 +258,32 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
 }

 future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
-    logger.info("Replaying {}", files);
-
    return parallel_for_each(files, [this](auto f) {
-        return this->recover(f).handle_exception([f](auto ep) {
-            logger.error("Error recovering {}: {}", f, ep);
-            std::rethrow_exception(ep);
-        });
+        return this->recover(f);
    });
 }

-future<> db::commitlog_replayer::recover(sstring file) {
-    return _impl->recover(file).then([file](impl::stats stats) {
+future<> db::commitlog_replayer::recover(sstring f) {
+    return _impl->recover(f).then([f](impl::stats stats) {
+        if (stats.corrupt_bytes != 0) {
+            logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
+        }
        logger.info("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
-                , file
+                , f
                , stats.applied_mutations
                , stats.invalid_mutations
                , stats.skipped_mutations
                );
-    });
+    }).handle_exception([f](auto ep) {
+        logger.error("Error recovering {}: {}", f, ep);
+        try {
+            std::rethrow_exception(ep);
+        } catch (std::invalid_argument&) {
+            logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.");
+            throw;
+        } catch (...) {
+            throw;
+        }
+    });;
 }

--- a/db/commitlog/commitlog_replayer.hh
+++ b/db/commitlog/commitlog_replayer.hh
@@ -57,7 +57,7 @@ class commitlog;

 class commitlog_replayer {
 public:
-    commitlog_replayer(commitlog_replayer&&);
+    commitlog_replayer(commitlog_replayer&&) noexcept;
    ~commitlog_replayer();

    static future<commitlog_replayer> create_replayer(seastar::sharded<cql3::query_processor>&);
--- a/db/config.cc
+++ b/db/config.cc
@@ -31,6 +31,7 @@
 #include "core/fstream.hh"
 #include "core/do_with.hh"
 #include "log.hh"
+#include <boost/any.hpp>

 static logging::logger logger("config");

@@ -116,8 +117,9 @@ template<typename K, typename V>
 struct convert<std::unordered_map<K, V>> {
    static Node encode(const std::unordered_map<K, V>& rhs) {
        Node node(NodeType::Map);
-        for(typename std::map<K, V>::const_iterator it=rhs.begin();it!=rhs.end();++it)
-            node.force_insert(it->first, it->second);
+        for (auto& p : rhs) {
+            node.force_insert(p.first, p.second);
+        }
        return node;
    }
    static bool decode(const Node& node, std::unordered_map<K, V>& rhs) {
@@ -412,3 +414,21 @@ future<> db::config::read_from_file(const sstring& filename) {
       return read_from_file(std::move(f));
    });
 }
+
+boost::filesystem::path db::config::get_conf_dir() {
+    using namespace boost::filesystem;
+
+    path confdir;
+    auto* cd = std::getenv("SCYLLA_CONF");
+    if (cd != nullptr) {
+        confdir = path(cd);
+    } else {
+        auto* p = std::getenv("SCYLLA_HOME");
+        if (p != nullptr) {
+            confdir = path(p);
+        }
+        confdir /= "conf";
+    }
+
+    return confdir;
+}
--- a/db/config.hh
+++ b/db/config.hh
@@ -121,23 +121,7 @@ public:
     * @return path of the directory where configuration files are located
     *         according the environment variables definitions.
     */
-    static boost::filesystem::path get_conf_dir() {
-        using namespace boost::filesystem;
-
-        path confdir;
-        auto* cd = std::getenv("SCYLLA_CONF");
-        if (cd != nullptr) {
-            confdir = path(cd);
-        } else {
-            auto* p = std::getenv("SCYLLA_HOME");
-            if (p != nullptr) {
-                confdir = path(p);
-            }
-            confdir /= "conf";
-        }
-
-        return confdir;
-    }
+    static boost::filesystem::path get_conf_dir();

    typedef std::unordered_map<sstring, sstring> string_map;
    typedef std::vector<sstring> string_list;
@@ -290,7 +274,7 @@ public:
            "Related information: Configuring compaction"   \
    )                                                   \
    /* Common fault detection setting */    \
-    val(phi_convict_threshold, uint32_t, 8, Unused,     \
+    val(phi_convict_threshold, uint32_t, 8, Used,     \
            "Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n"  \
            "Related information: Failure detection and recovery"  \
    )                                                   \
@@ -316,7 +300,7 @@ public:
    val(commitlog_sync_batch_window_in_ms, uint32_t, 10000, Used,     \
            "Controls how long the system waits for other writes before performing a sync in \"batch\" mode."    \
    )   \
-    val(commitlog_total_space_in_mb, uint32_t, 8192, Used,     \
+    val(commitlog_total_space_in_mb, int64_t, -1, Used,     \
            "Total space used for commitlogs. If the used space goes above this value, Cassandra rounds up to the next nearest segment multiple and flushes memtables to disk for the oldest commitlog segments, removing those log segments. This reduces the amount of data to replay on startup, and prevents infrequently-updated tables from indefinitely keeping commitlog segments. A small total commitlog space tends to cause more flush activity on less-active tables.\n"  \
            "Related information: Configuring memtable throughput"  \
    )                                                   \
@@ -402,11 +386,11 @@ public:
    val(batch_size_warn_threshold_in_kb, uint32_t, 5, Unused,     \
            "Log WARN on any batch size exceeding this value in kilobytes. Caution should be taken on increasing the size of this threshold as it can lead to node instability."  \
    )   \
-    val(broadcast_address, sstring, /* listen_address */, Unused, \
+    val(broadcast_address, sstring, /* listen_address */, Used, \
            "The IP address a node tells other nodes in the cluster to contact it by. It allows public and private address to be different. For example, use the broadcast_address parameter in topologies where not all nodes have access to other nodes by their private IP addresses.\n" \
            "If your Cassandra cluster is deployed across multiple Amazon EC2 regions and you use the EC2MultiRegionSnitch , set the broadcast_address to public IP address of the node and the listen_address to the private IP."    \
    )   \
-    val(initial_token, sstring, /* N/A */, Unused,     \
+    val(initial_token, sstring, /* N/A */, Used,     \
            "Used in the single-node-per-token architecture, where a node owns exactly one contiguous range in the ring space. Setting this property overrides num_tokens.\n"   \
            "If you not using vnodes or have num_tokens set it to 1 or unspecified (#num_tokens), you should always specify this parameter when setting up a production cluster for the first time and when adding capacity. For more information, see this parameter in the Cassandra 1.1 Node and Cluster Configuration documentation.\n" \
            "This parameter can be used with num_tokens (vnodes ) in special cases such as Restoring from a snapshot." \
@@ -430,7 +414,7 @@ public:
            , "org.apache.cassandra.dht.ByteOrderedPartitioner" \
            , "org.apache.cassandra.dht.OrderPreservingPartitioner" \
    )                                                   \
-    val(storage_port, uint16_t, 7000, Unused,                \
+    val(storage_port, uint16_t, 7000, Used,                \
            "The port for inter-node communication."  \
    )                                                   \
    /* Advanced automatic backup setting */ \
@@ -560,7 +544,7 @@ public:
    )   \
    /* RPC (remote procedure call) settings */  \
    /* Settings for configuring and tuning client connections. */   \
-    val(broadcast_rpc_address, sstring, /* unset */, Unused,    \
+    val(broadcast_rpc_address, sstring, /* unset */, Used,    \
            "RPC address to broadcast to drivers and other Cassandra nodes. This cannot be set to 0.0.0.0. If blank, it is set to the value of the rpc_address or rpc_interface. If rpc_address or rpc_interfaceis set to 0.0.0.0, this property must be set.\n"    \
    )   \
    val(rpc_port, uint16_t, 9160, Used,                \
@@ -682,7 +666,7 @@ public:
    val(permissions_update_interval_in_ms, uint32_t, 2000, Unused,     \
            "Refresh interval for permissions cache (if enabled). After this interval, cache entries become eligible for refresh. On next access, an async reload is scheduled and the old value is returned until it completes. If permissions_validity_in_ms , then this property must benon-zero."   \
    )   \
-    val(server_encryption_options, string_map, /*none*/, Unused,     \
+    val(server_encryption_options, string_map, /*none*/, Used,     \
            "Enable or disable inter-node encryption. You must also generate keys and provide the appropriate key and trust store locations and passwords. No custom encryption options are currently enabled. The available options are:\n"    \
            "\n"    \
            "internode_encryption : (Default: none ) Enable or disable encryption of inter-node communication using the TLS_RSA_WITH_AES_128_CBC_SHA cipher suite for authentication, key exchange, and encryption of data transfers. The available inter-node options are:\n"  \
@@ -690,20 +674,9 @@ public:
            "\tnone : No encryption.\n" \
            "\tdc : Encrypt the traffic between the data centers (server only).\n"  \
            "\track : Encrypt the traffic between the racks(server only).\n"    \
-            "\tkeystore : (Default: conf/.keystore ) The location of a Java keystore (JKS) suitable for use with Java Secure Socket Extension (JSSE), which is the Java version of the Secure Sockets Layer (SSL), and Transport Layer Security (TLS) protocols. The keystore contains the private key used to encrypt outgoing messages.\n"    \
-            "\tkeystore_password : (Default: cassandra ) Password for the keystore.\n"  \
-            "\ttruststore : (Default: conf/.truststore ) Location of the truststore containing the trusted certificate for authenticating remote servers.\n"    \
-            "\ttruststore_password : (Default: cassandra ) Password for the truststore.\n"  \
-            "\n"    \
-            "The passwords used in these options must match the passwords used when generating the keystore and truststore. For instructions on generating these files, see Creating a Keystore to Use with JSSE.\n"   \
-            "\n"    \
-            "The advanced settings are:\n"  \
-            "\n"    \
-            "\tprotocol : (Default: TLS )\n"    \
-            "\talgorithm : (Default: SunX509 )\n"   \
-            "\tstore_type : (Default: JKS )\n"  \
-            "\tcipher_suites : (Default: TLS_RSA_WITH_AES_128_CBC_SHA , TLS_RSA_WITH_AES_256_CBC_SHA )\n"   \
-            "\trequire_client_auth : (Default: false ) Enables or disables certificate authentication.\n" \
+            "certificate : (Default: conf/scylla.crt) The location of a PEM-encoded x509 certificate used to identify and encrypt the internode communication.\n"    \
+            "keyfile : (Default: conf/scylla.key) PEM Key file associated with certificate.\n"  \
+            "truststore : (Default: <system truststore> ) Location of the truststore containing the trusted certificate for authenticating remote servers.\n"    \
            "Related information: Node-to-node encryption"  \
    )   \
    val(client_encryption_options, string_map, /*none*/, Unused,     \
@@ -743,6 +716,16 @@ public:
    val(api_ui_dir, sstring, "swagger-ui/dist/", Used, "The directory location of the API GUI") \
    val(api_doc_dir, sstring, "api/api-doc/", Used, "The API definition file directory") \
    val(load_balance, sstring, "none", Used, "CQL request load balancing: 'none' or round-robin'") \
+    val(consistent_rangemovement, bool, true, Used, "When set to true, range movements will be consistent. It means: 1) it will refuse to bootstrapp a new node if other bootstrapping/leaving/moving nodes detected. 2) data will be streamed to a new node only from the node which is no longer responsible for the token range. Same as -Dcassandra.consistent.rangemovement in cassandra") \
+    val(join_ring, bool, true, Used, "When set to true, a node will join the token ring. When set to false, a node will not join the token ring. User can use nodetool join to initiate ring joinging later. Same as -Dcassandra.join_ring in cassandra.") \
+    val(load_ring_state, bool, true, Used, "When set to true, load tokens and host_ids previously saved. Same as -Dcassandra.load_ring_state in cassandra.") \
+    val(replace_node, sstring, "", Used, "The UUID of the node to replace. Same as -Dcassandra.replace_node in cssandra.") \
+    val(replace_token, sstring, "", Used, "The tokens of the node to replace. Same as -Dcassandra.replace_token in cassandra.") \
+    val(replace_address, sstring, "", Used, "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.") \
+    val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped sucessfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
+    val(override_decommission, bool, false, Used, "Set true to force a decommissioned node to join the cluster") \
+    val(ring_delay_ms, uint32_t, 30 * 1000, Used, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.") \
+    val(developer_mode, bool, false, Used, "Relax environement checks. Setting to true can reduce performance and reliability significantly.") \
    /* done! */

 #define _make_value_member(name, type, deflt, status, desc, ...)    \
--- a/db/query_context.hh
+++ b/db/query_context.hh
@@ -42,7 +42,7 @@ struct query_context {
    future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, sstring cf, Args&&... args) {
        // FIXME: Would be better not to use sprint here.
        sstring req = sprint(text, cf);
-        return this->_qp.local().execute_internal(req, { boost::any(std::forward<Args>(args))... });
+        return this->_qp.local().execute_internal(req, { data_value(std::forward<Args>(args))... });
    }
    database& db() {
        return _db.local();
@@ -67,9 +67,8 @@ extern std::unique_ptr<query_context> qctx;
 // we executed the query, and return an empty result
 template <typename... Args>
 static future<::shared_ptr<cql3::untyped_result_set>> execute_cql(sstring text, Args&&... args) {
-    if (qctx) {
-        return qctx->execute_cql(text, std::forward<Args>(args)...);
-    }
-    return make_ready_future<shared_ptr<cql3::untyped_result_set>>(::make_shared<cql3::untyped_result_set>(cql3::untyped_result_set::make_empty()));
+    assert(qctx);
+    return qctx->execute_cql(text, std::forward<Args>(args)...);
 }
+
 }
--- a/db/schema_tables.cc
+++ b/db/schema_tables.cc
@@ -329,7 +329,7 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
            std::vector<query::result> results;
            for (auto&& p : rs->partitions()) {
                auto mut = p.mut().unfreeze(s);
-                auto partition_key = boost::any_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
+                auto partition_key = value_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
                if (partition_key == system_keyspace::NAME) {
                    continue;
                }
@@ -368,7 +368,7 @@ future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<ser
            std::vector<frozen_mutation> results;
            for (auto&& p : rs->partitions()) {
                auto mut = p.mut().unfreeze(s);
-                auto partition_key = boost::any_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
+                auto partition_key = value_cast<sstring>(utf8_type->deserialize(mut.key().get_component(*s, 0)));
                if (partition_key == system_keyspace::NAME) {
                    continue;
                }
@@ -398,18 +398,18 @@ read_schema_for_keyspaces(distributed<service::storage_proxy>& proxy, const sstr
    return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result{}, insert);
 }

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name)
 {
    auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
    auto keyspace_key = dht::global_partitioner().decorate_key(*schema,
        partition_key::from_singular(*schema, keyspace_name));
    return db::system_keyspace::query(proxy, schema_table_name, keyspace_key).then([keyspace_name] (auto&& rs) {
-        return schema_result::value_type{keyspace_name, std::move(rs)};
+        return schema_result_value_type{keyspace_name, std::move(rs)};
    });
 }

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name, const sstring& table_name)
 {
    auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
@@ -417,7 +417,7 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, cons
        partition_key::from_singular(*schema, keyspace_name));
    auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix(*schema, exploded_clustering_prefix({utf8_type->decompose(table_name)})));
    return db::system_keyspace::query(proxy, schema_table_name, keyspace_key, clustering_range).then([keyspace_name] (auto&& rs) {
-        return schema_result::value_type{keyspace_name, std::move(rs)};
+        return schema_result_value_type{keyspace_name, std::move(rs)};
    });
 }

@@ -468,7 +468,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
       std::set<sstring> keyspaces;
       std::set<utils::UUID> column_families;
       for (auto&& mutation : mutations) {
-           keyspaces.emplace(boost::any_cast<sstring>(utf8_type->deserialize(mutation.key().get_component(*s, 0))));
+           keyspaces.emplace(value_cast<sstring>(utf8_type->deserialize(mutation.key().get_component(*s, 0))));
           column_families.emplace(mutation.column_family_id());
       }

@@ -528,7 +528,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector

 future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
 {
-    std::vector<schema_result::value_type> created;
+    std::vector<schema_result_value_type> created;
    std::vector<sstring> altered;
    std::set<sstring> dropped;

@@ -552,7 +552,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
    for (auto&& key : diff.entries_only_on_right) {
        auto&& value = after[key];
        if (!value->empty()) {
-            created.emplace_back(schema_result::value_type{key, std::move(value)});
+            created.emplace_back(schema_result_value_type{key, std::move(value)});
        }
    }
    for (auto&& key : diff.entries_differing) {
@@ -566,7 +566,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
        } else if (!pre->empty()) {
            dropped.emplace(keyspace_name);
        } else if (!post->empty()) { // a (re)created keyspace
-            created.emplace_back(schema_result::value_type{key, std::move(post)});
+            created.emplace_back(schema_result_value_type{key, std::move(post)});
        }
    }
    return do_with(std::move(created), [&proxy, altered = std::move(altered)] (auto& created) {
@@ -899,7 +899,7 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
 *
 * @param partition Keyspace attributes in serialized form
 */
-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& result)
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& result)
 {
    auto&& rs = result.second;
    if (rs->empty()) {
@@ -1269,7 +1269,7 @@ void create_table_from_table_row_and_column_rows(schema_builder& builder, const
    } else {
        // FIXME:
        // is_dense = CFMetaData.calculateIsDense(fullRawComparator, columnDefs);
-        throw std::runtime_error("not implemented");
+        throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
    }

    bool is_compound = cell_comparator::check_compound(table_row.get_nonnull<sstring>("comparator"));
@@ -1310,10 +1310,10 @@ void create_table_from_table_row_and_column_rows(schema_builder& builder, const
        builder.set_max_compaction_threshold(table_row.get_nonnull<int>("max_compaction_threshold"));
    }

-#if 0
-    if (result.has("comment"))
-        cfm.comment(result.getString("comment"));
-#endif
+    if (table_row.has("comment")) {
+        builder.set_comment(table_row.get_nonnull<sstring>("comment"));
+    }
+
    if (table_row.has("memtable_flush_period_in_ms")) {
        builder.set_memtable_flush_period(table_row.get_nonnull<int32_t>("memtable_flush_period_in_ms"));
    }
--- a/db/schema_tables.hh
+++ b/db/schema_tables.hh
@@ -55,6 +55,7 @@ namespace db {
 namespace schema_tables {

 using schema_result = std::map<sstring, lw_shared_ptr<query::result_set>>;
+using schema_result_value_type = std::pair<sstring, lw_shared_ptr<query::result_set>>;

 static constexpr auto KEYSPACES = "schema_keyspaces";
 static constexpr auto COLUMNFAMILIES = "schema_columnfamilies";
@@ -74,7 +75,7 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&

 future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<service::storage_proxy>& proxy);

-future<schema_result::value_type>
+future<schema_result_value_type>
 read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);

 future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
@@ -89,11 +90,11 @@ std::vector<mutation> make_create_keyspace_mutations(lw_shared_ptr<keyspace_meta

 std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp);

-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);

 future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after);

-lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
+lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);

 mutation make_create_keyspace_mutation(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true);

--- a/db/serializer.cc
+++ b/db/serializer.cc
@@ -143,18 +143,18 @@ atomic_cell_view db::serializer<atomic_cell_view>::read(input& in) {
 }

 template<>
-db::serializer<collection_mutation::view>::serializer(const collection_mutation::view& c)
+db::serializer<collection_mutation_view>::serializer(const collection_mutation_view& c)
        : _item(c), _size(bytes_view_serializer(c.serialize()).size()) {
 }

 template<>
-void db::serializer<collection_mutation::view>::write(output& out, const collection_mutation::view& t) {
+void db::serializer<collection_mutation_view>::write(output& out, const collection_mutation_view& t) {
    bytes_view_serializer::write(out, t.serialize());
 }

 template<>
-void db::serializer<collection_mutation::view>::read(collection_mutation::view& c, input& in) {
-    c = collection_mutation::view::from_bytes(bytes_view_serializer::read(in));
+void db::serializer<collection_mutation_view>::read(collection_mutation_view& c, input& in) {
+    c = collection_mutation_view::from_bytes(bytes_view_serializer::read(in));
 }

 template<>
@@ -187,30 +187,6 @@ void db::serializer<partition_key_view>::skip(input& in) {
    in.skip(len);
 }

-template<>
-db::serializer<clustering_key_view>::serializer(const clustering_key_view& key)
-    : _item(key), _size(sizeof(uint16_t) /* size */ + key.representation().size()) {
-}
-
-template<>
-void db::serializer<clustering_key_view>::write(output& out, const clustering_key_view& key) {
-    bytes_view v = key.representation();
-    out.write<uint16_t>(v.size());
-    out.write(v.begin(), v.end());
-}
-
-template<>
-void db::serializer<clustering_key_view>::read(clustering_key_view& b, input& in) {
-    auto len = in.read<uint16_t>();
-    b = clustering_key_view::from_bytes(in.read_view(len));
-}
-
-template<>
-clustering_key_view db::serializer<clustering_key_view>::read(input& in) {
-    auto len = in.read<uint16_t>();
-    return clustering_key_view::from_bytes(in.read_view(len));
-}
-
 template<>
 db::serializer<clustering_key_prefix_view>::serializer(const clustering_key_prefix_view& key)
    : _item(key), _size(sizeof(uint16_t) /* size */ + key.representation().size()) {
@@ -278,10 +254,9 @@ template class db::serializer<bytes> ;
 template class db::serializer<bytes_view> ;
 template class db::serializer<sstring> ;
 template class db::serializer<atomic_cell_view> ;
-template class db::serializer<collection_mutation::view> ;
+template class db::serializer<collection_mutation_view> ;
 template class db::serializer<utils::UUID> ;
 template class db::serializer<partition_key_view> ;
-template class db::serializer<clustering_key_view> ;
 template class db::serializer<clustering_key_prefix_view> ;
 template class db::serializer<frozen_mutation> ;
 template class db::serializer<db::replay_position> ;
--- a/db/serializer.hh
+++ b/db/serializer.hh
@@ -22,11 +22,12 @@
 #ifndef DB_SERIALIZER_HH_
 #define DB_SERIALIZER_HH_

+#include <experimental/optional>
+
 #include "utils/data_input.hh"
 #include "utils/data_output.hh"
 #include "bytes_ostream.hh"
 #include "bytes.hh"
-#include "mutation.hh"
 #include "keys.hh"
 #include "database_fwd.hh"
 #include "frozen_mutation.hh"
@@ -58,9 +59,9 @@ public:
        return *this;
    }

-    static void write(output&, const T&);
-    static void read(T&, input&);
-    static T read(input&);
+    static void write(output&, const type&);
+    static void read(type&, input&);
+    static type read(input&);
    static void skip(input& in);

    size_t size() const {
@@ -76,11 +77,100 @@ public:
    void write(data_output& out) const {
        write(out, _item);
    }
+
+    bytes to_bytes() const {
+        bytes b(bytes::initialized_later(), _size);
+        data_output out(b);
+        write(out);
+        return b;
+    }
+
+    static type from_bytes(bytes_view v) {
+        data_input in(v);
+        return read(in);
+    }
 private:
-    const T& _item;
+    const type& _item;
    size_t _size;
 };

+template<typename T>
+class serializer<std::experimental::optional<T>> {
+public:
+    typedef std::experimental::optional<T> type;
+    typedef data_output output;
+    typedef data_input input;
+    typedef serializer<T> _MyType;
+
+    serializer(const type& t)
+        : _item(t)
+        , _size(output::serialized_size<bool>() + (t ? serializer<T>(*t).size() : 0))
+    {}
+
+    // apply to memory, must be at least size() large.
+    const _MyType& operator()(output& out) const {
+        write(out, _item);
+        return *this;
+    }
+
+    static void write(output& out, const type& v) {
+        bool en = v;
+        out.write<bool>(en);
+        if (en) {
+            serializer<T>::write(out, *v);
+        }
+    }
+    static void read(type& dst, input& in) {
+        auto en = in.read<bool>();
+        if (en) {
+            dst = serializer<T>::read(in);
+        } else {
+            dst = {};
+        }
+    }
+    static type read(input& in) {
+        type t;
+        read(t, in);
+        return t;
+    }
+    static void skip(input& in) {
+        auto en = in.read<bool>();
+        if (en) {
+            serializer<T>::skip(in);
+        }
+    }
+
+    size_t size() const {
+        return _size;
+    }
+
+    void write(bytes_ostream& out) const {
+        auto buf = out.write_place_holder(_size);
+        data_output data_out((char*)buf, _size);
+        write(data_out, _item);
+    }
+
+    void write(data_output& out) const {
+        write(out, _item);
+    }
+
+    bytes to_bytes() const {
+        bytes b(bytes::initialized_later(), _size);
+        data_output out(b);
+        write(out);
+        return b;
+    }
+
+    static type from_bytes(bytes_view v) {
+        data_input in(v);
+        return read(in);
+    }
+private:
+    const std::experimental::optional<T> _item;
+    size_t _size;
+};
+
+
 template<> serializer<utils::UUID>::serializer(const utils::UUID &);
 template<> void serializer<utils::UUID>::write(output&, const type&);
 template<> void serializer<utils::UUID>::read(utils::UUID&, input&);
@@ -109,9 +199,9 @@ template<> void serializer<atomic_cell_view>::write(output&, const type&);
 template<> void serializer<atomic_cell_view>::read(atomic_cell_view&, input&);
 template<> atomic_cell_view serializer<atomic_cell_view>::read(input&);

-template<> serializer<collection_mutation::view>::serializer(const collection_mutation::view &);
-template<> void serializer<collection_mutation::view>::write(output&, const type&);
-template<> void serializer<collection_mutation::view>::read(collection_mutation::view&, input&);
+template<> serializer<collection_mutation_view>::serializer(const collection_mutation_view &);
+template<> void serializer<collection_mutation_view>::write(output&, const type&);
+template<> void serializer<collection_mutation_view>::read(collection_mutation_view&, input&);

 template<> serializer<frozen_mutation>::serializer(const frozen_mutation &);
 template<> void serializer<frozen_mutation>::write(output&, const type&);
@@ -124,11 +214,6 @@ template<> void serializer<partition_key_view>::read(partition_key_view&, input&
 template<> partition_key_view serializer<partition_key_view>::read(input&);
 template<> void serializer<partition_key_view>::skip(input&);

-template<> serializer<clustering_key_view>::serializer(const clustering_key_view &);
-template<> void serializer<clustering_key_view>::write(output&, const clustering_key_view&);
-template<> void serializer<clustering_key_view>::read(clustering_key_view&, input&);
-template<> clustering_key_view serializer<clustering_key_view>::read(input&);
-
 template<> serializer<clustering_key_prefix_view>::serializer(const clustering_key_prefix_view &);
 template<> void serializer<clustering_key_prefix_view>::write(output&, const clustering_key_prefix_view&);
 template<> void serializer<clustering_key_prefix_view>::read(clustering_key_prefix_view&, input&);
@@ -160,7 +245,7 @@ typedef serializer<bytes> bytes_serializer; // Compatible with bytes_view_serial
 typedef serializer<bytes_view> bytes_view_serializer; // Compatible with bytes_serializer
 typedef serializer<sstring> sstring_serializer;
 typedef serializer<atomic_cell_view> atomic_cell_view_serializer;
-typedef serializer<collection_mutation::view> collection_mutation_view_serializer;
+typedef serializer<collection_mutation_view> collection_mutation_view_serializer;
 typedef serializer<utils::UUID> uuid_serializer;
 typedef serializer<partition_key_view> partition_key_view_serializer;
 typedef serializer<clustering_key_view> clustering_key_view_serializer;
--- a/db/system_keyspace.cc
+++ b/db/system_keyspace.cc
@@ -464,7 +464,8 @@ static future<> build_bootstrap_info() {
        static auto state_map = std::unordered_map<sstring, bootstrap_state>({
            { "NEEDS_BOOTSTRAP", bootstrap_state::NEEDS_BOOTSTRAP },
            { "COMPLETED", bootstrap_state::COMPLETED },
-            { "IN_PROGRESS", bootstrap_state::IN_PROGRESS }
+            { "IN_PROGRESS", bootstrap_state::IN_PROGRESS },
+            { "DECOMMISSIONED", bootstrap_state::DECOMMISSIONED }
        });
        bootstrap_state state = bootstrap_state::NEEDS_BOOTSTRAP;

@@ -486,9 +487,7 @@ future<> init_local_cache() {
 }

 void minimal_setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
-    auto new_ctx = std::make_unique<query_context>(db, qp);
-    qctx.swap(new_ctx);
-    assert(!new_ctx);
+    qctx = std::make_unique<query_context>(db, qp);
 }

 future<> setup(distributed<database>& db, distributed<cql3::query_processor>& qp) {
@@ -540,10 +539,11 @@ future<> save_truncation_records(const column_family& cf, db_clock::time_point t
    out.write<db_clock::rep>(truncated_at.time_since_epoch().count());

    map_type_impl::native_type tmp;
-    tmp.emplace_back(boost::any{ cf.schema()->id() }, boost::any{ buf });
+    tmp.emplace_back(cf.schema()->id(), data_value(buf));
+    auto map_type = map_type_impl::get_instance(uuid_type, bytes_type, true);

    sstring req = sprint("UPDATE system.%s SET truncated_at = truncated_at + ? WHERE key = '%s'", LOCAL, LOCAL);
-    return qctx->qp().execute_internal(req, {tmp}).then([](auto rs) {
+    return qctx->qp().execute_internal(req, {make_map_value(map_type, tmp)}).then([](auto rs) {
        truncation_records = {};
        return force_blocking_flush(LOCAL);
    });
@@ -633,7 +633,7 @@ future<db_clock::time_point> get_truncated_at(utils::UUID cf_id) {
 set_type_impl::native_type prepare_tokens(std::unordered_set<dht::token>& tokens) {
    set_type_impl::native_type tset;
    for (auto& t: tokens) {
-        tset.push_back(boost::any(dht::global_partitioner().to_sstring(t)));
+        tset.push_back(dht::global_partitioner().to_sstring(t));
    }
    return tset;
 }
@@ -641,7 +641,7 @@ set_type_impl::native_type prepare_tokens(std::unordered_set<dht::token>& tokens
 std::unordered_set<dht::token> decode_tokens(set_type_impl::native_type& tokens) {
    std::unordered_set<dht::token> tset;
    for (auto& t: tokens) {
-        auto str = boost::any_cast<sstring>(t);
+        auto str = value_cast<sstring>(t);
        assert(str == dht::global_partitioner().to_sstring(dht::global_partitioner().from_sstring(str)));
        tset.insert(dht::global_partitioner().from_sstring(str));
    }
@@ -658,7 +658,8 @@ future<> update_tokens(gms::inet_address ep, std::unordered_set<dht::token> toke
    }

    sstring req = "INSERT INTO system.%s (peer, tokens) VALUES (?, ?)";
-    return execute_cql(req, PEERS, ep.addr(), prepare_tokens(tokens)).discard_result().then([] {
+    auto set_type = set_type_impl::get_instance(utf8_type, true);
+    return execute_cql(req, PEERS, ep.addr(), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(PEERS);
    });
 }
@@ -689,7 +690,7 @@ future<std::unordered_map<gms::inet_address, std::unordered_set<dht::token>>> lo
                auto blob = row.get_blob("tokens");
                auto cdef = peers()->get_column_definition("tokens");
                auto deserialized = cdef->type->deserialize(blob);
-                auto tokens = boost::any_cast<set_type_impl::native_type>(deserialized);
+                auto tokens = value_cast<set_type_impl::native_type>(deserialized);

                ret->emplace(peer, decode_tokens(tokens));
            }
@@ -796,6 +797,8 @@ future<> remove_endpoint(gms::inet_address ep) {
    }).then([ep] {
        sstring req = "DELETE FROM system.%s WHERE peer = ?";
        return execute_cql(req, PEERS, ep.addr()).discard_result();
+    }).then([] {
+        return force_blocking_flush(PEERS);
    });
 }

@@ -808,16 +811,14 @@ future<> update_tokens(std::unordered_set<dht::token> tokens) {
    }

    sstring req = "INSERT INTO system.%s (key, tokens) VALUES (?, ?)";
-    return execute_cql(req, LOCAL, sstring(LOCAL), prepare_tokens(tokens)).discard_result().then([] {
+    auto set_type = set_type_impl::get_instance(utf8_type, true);
+    return execute_cql(req, LOCAL, sstring(LOCAL), make_set_value(set_type, prepare_tokens(tokens))).discard_result().then([] {
        return force_blocking_flush(LOCAL);
    });
 }

 future<> force_blocking_flush(sstring cfname) {
-    if (!qctx) {
-        return make_ready_future<>();
-    }
-
+    assert(qctx);
    return qctx->_db.invoke_on_all([cfname = std::move(cfname)](database& db) {
        // if (!Boolean.getBoolean("cassandra.unsafesystem"))
        column_family& cf = db.find_column_family(NAME, cfname);
@@ -862,7 +863,7 @@ future<std::unordered_set<dht::token>> get_saved_tokens() {
        auto blob = msg->one().get_blob("tokens");
        auto cdef = local()->get_column_definition("tokens");
        auto deserialized = cdef->type->deserialize(blob);
-        auto tokens = boost::any_cast<set_type_impl::native_type>(deserialized);
+        auto tokens = value_cast<set_type_impl::native_type>(deserialized);

        return make_ready_future<std::unordered_set<dht::token>>(decode_tokens(tokens));
    });
@@ -876,6 +877,10 @@ bool bootstrap_in_progress() {
    return get_bootstrap_state() == bootstrap_state::IN_PROGRESS;
 }

+bool was_decommissioned() {
+    return get_bootstrap_state() == bootstrap_state::DECOMMISSIONED;
+}
+
 bootstrap_state get_bootstrap_state() {
    return _local_cache.local()._state;
 }
@@ -884,7 +889,8 @@ future<> set_bootstrap_state(bootstrap_state state) {
    static std::unordered_map<bootstrap_state, sstring, enum_hash<bootstrap_state>> state_to_name({
        { bootstrap_state::NEEDS_BOOTSTRAP, "NEEDS_BOOTSTRAP" },
        { bootstrap_state::COMPLETED, "COMPLETED" },
-        { bootstrap_state::IN_PROGRESS, "IN_PROGRESS" }
+        { bootstrap_state::IN_PROGRESS, "IN_PROGRESS" },
+        { bootstrap_state::DECOMMISSIONED, "DECOMMISSIONED" }
    });

    sstring state_name = state_to_name.at(state);
@@ -1004,5 +1010,55 @@ query(distributed<service::storage_proxy>& proxy, const sstring& cf_name, const
    });
 }

+static map_type_impl::native_type prepare_rows_merged(std::unordered_map<int32_t, int64_t>& rows_merged) {
+    map_type_impl::native_type tmp;
+    for (auto& r: rows_merged) {
+        int32_t first = r.first;
+        int64_t second = r.second;
+        auto map_element = std::make_pair<data_value, data_value>(data_value(first), data_value(second));
+        tmp.push_back(std::move(map_element));
+    }
+    return tmp;
+}
+
+future<> update_compaction_history(sstring ksname, sstring cfname, int64_t compacted_at, int64_t bytes_in, int64_t bytes_out,
+                                   std::unordered_map<int32_t, int64_t> rows_merged)
+{
+    // don't write anything when the history table itself is compacted, since that would in turn cause new compactions
+    if (ksname == "system" && cfname == COMPACTION_HISTORY) {
+        return make_ready_future<>();
+    }
+
+    auto map_type = map_type_impl::get_instance(int32_type, long_type, true);
+
+    sstring req = "INSERT INTO system.%s (id, keyspace_name, columnfamily_name, compacted_at, bytes_in, bytes_out, rows_merged) VALUES (?, ?, ?, ?, ?, ?, ?)";
+
+    return execute_cql(req, COMPACTION_HISTORY, utils::UUID_gen::get_time_UUID(), ksname, cfname, compacted_at, bytes_in, bytes_out,
+                       make_map_value(map_type, prepare_rows_merged(rows_merged))).discard_result();
+}
+
+future<std::vector<compaction_history_entry>> get_compaction_history()
+{
+    sstring req = "SELECT * from system.%s";
+    return execute_cql(req, COMPACTION_HISTORY).then([] (::shared_ptr<cql3::untyped_result_set> msg) {
+        std::vector<compaction_history_entry> history;
+
+        for (auto& row : *msg) {
+            compaction_history_entry entry;
+            entry.id = row.get_as<utils::UUID>("id");
+            entry.ks = row.get_as<sstring>("keyspace_name");
+            entry.cf = row.get_as<sstring>("columnfamily_name");
+            entry.compacted_at = row.get_as<int64_t>("compacted_at");
+            entry.bytes_in = row.get_as<int64_t>("bytes_in");
+            entry.bytes_out = row.get_as<int64_t>("bytes_out");
+            if (row.has("rows_merged")) {
+                entry.rows_merged = row.get_map<int32_t, int64_t>("rows_merged");
+            }
+            history.push_back(std::move(entry));
+        }
+        return std::move(history);
+    });
+}
+
 } // namespace system_keyspace
 } // namespace db
--- a/db/system_keyspace.hh
+++ b/db/system_keyspace.hh
@@ -153,7 +153,8 @@ load_dc_rack_info();
 enum class bootstrap_state {
    NEEDS_BOOTSTRAP,
    COMPLETED,
-    IN_PROGRESS
+    IN_PROGRESS,
+    DECOMMISSIONED
 };

 #if 0
@@ -258,26 +259,28 @@ enum class bootstrap_state {
        compactionLog.truncateBlocking();
    }

-    public static void updateCompactionHistory(String ksname,
-                                               String cfname,
-                                               long compactedAt,
-                                               long bytesIn,
-                                               long bytesOut,
-                                               Map<Integer, Long> rowsMerged)
-    {
-        // don't write anything when the history table itself is compacted, since that would in turn cause new compactions
-        if (ksname.equals("system") && cfname.equals(COMPACTION_HISTORY))
-            return;
-        String req = "INSERT INTO system.%s (id, keyspace_name, columnfamily_name, compacted_at, bytes_in, bytes_out, rows_merged) VALUES (?, ?, ?, ?, ?, ?, ?)";
-        executeInternal(String.format(req, COMPACTION_HISTORY), UUIDGen.getTimeUUID(), ksname, cfname, ByteBufferUtil.bytes(compactedAt), bytesIn, bytesOut, rowsMerged);
-    }
-
    public static TabularData getCompactionHistory() throws OpenDataException
    {
        UntypedResultSet queryResultSet = executeInternal(String.format("SELECT * from system.%s", COMPACTION_HISTORY));
        return CompactionHistoryTabularData.from(queryResultSet);
    }
 #endif
+    struct compaction_history_entry {
+        utils::UUID id;
+        sstring ks;
+        sstring cf;
+        int64_t compacted_at = 0;
+        int64_t bytes_in = 0;
+        int64_t bytes_out = 0;
+        // Key: number of rows merged
+        // Value: counter
+        std::unordered_map<int32_t, int64_t> rows_merged;
+    };
+
+    future<> update_compaction_history(sstring ksname, sstring cfname, int64_t compacted_at, int64_t bytes_in, int64_t bytes_out,
+                                       std::unordered_map<int32_t, int64_t> rows_merged);
+    future<std::vector<compaction_history_entry>> get_compaction_history();
+
    typedef std::vector<db::replay_position> replay_positions;

    future<> save_truncation_record(const column_family&, db_clock::time_point truncated_at, db::replay_position);
@@ -519,6 +522,7 @@ enum class bootstrap_state {
 bool bootstrap_complete();
 bool bootstrap_in_progress();
 bootstrap_state get_bootstrap_state();
+bool was_decommissioned();
 future<> set_bootstrap_state(bootstrap_state state);

 #if 0
--- a/debian/control
+++ b/debian/control
@@ -1,13 +0,0 @@
-Source: scylla-server
-Maintainer: Takuya ASADA <syuu@scylladb.com>
-Homepage: http://scylladb.com
-Section: database
-Priority: optional
-Standards-Version: 3.9.2
-Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3-tool, antlr3-c++-dev, ragel, g++-4.9, ninja-build, git, libboost-program-options1.55-dev, libboost-filesystem1.55-dev, libboost-system1.55-dev, libboost-thread1.55-dev, libboost-test1.55-dev
-
-Package: scylla-server
-Architecture: amd64
-Depends: ${shlibs:Depends}, ${misc:Depends}, hugepages
-Description: Scylla database server binaries 
- Scylla is a highly scalable, eventually consistent, distributed, partitioned row DB.
--- a/debian/copyright
+++ b/debian/copyright
@@ -1,16 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: Scylla DB
-Upstream-Contact: http://www.scylladb.com/
-Source: https://github.com/scylladb/scylla
-
-Files: *
-Copyright: Copyright (C) 2015 ScyllaDB
-License: AGPL-3.0
-
-Files: seastar/*
-Copyright: Copyright (C) 2015 ScyllaDB
-License: Apache
-
-Files: seastar/dpdk/*
-Copyright: Copyright(c) 2015 Intel Corporation. All rights reserved.
-License: BSD-3-clause
--- a/debian/limits.d/scylla.conf
+++ b/debian/limits.d/scylla.conf
@@ -1,4 +0,0 @@
-scylla  -  memlock  unlimited
-scylla  -  nofile   100000
-scylla  -  as       unlimited
-scylla  -  nproc    8096
--- a/debian/scylla-server.postinst
+++ b/debian/scylla-server.postinst
@@ -1,24 +0,0 @@
-#!/bin/sh
-
-set -e
-
-if [ "$1" = configure ]; then
-    adduser --system \
-            --quiet \
-            --home /var/lib/scylla \
-            --no-create-home \
-            --disabled-password \
-            --group scylla
-    chown -R scylla:scylla /var/lib/scylla
-fi
-
-# Automatically added by dh_installinit
-if [ -x "/etc/init.d/scylla-server" ]; then
-	if [ ! -e "/etc/init/scylla-server.conf" ]; then
-		update-rc.d scylla-server defaults >/dev/null
-	fi
-fi
-# End automatically added section
-# Automatically added by dh_installinit
-update-rc.d -f scylla-server remove >/dev/null || exit $?
-# End automatically added section
--- a/debian/scylla-server.preinst
+++ b/debian/scylla-server.preinst
@@ -1,9 +0,0 @@
-# Automatically added by dh_installinit
-if [ "$1" = install ] || [ "$1" = upgrade ]; then
-	if [ -e "/etc/init.d/scylla-server" ] && [ -L "/etc/init.d/scylla-server" ] \
-	   && [ $(readlink -f "/etc/init.d/scylla-server") = /lib/init/upstart-job ]
-	then
-		rm -f "/etc/init.d/scylla-server"
-	fi
-fi
-# End automatically added section
--- a/dht/boot_strapper.cc
+++ b/dht/boot_strapper.cc
@@ -71,33 +71,33 @@ future<> boot_strapper::bootstrap() {
 }

 std::unordered_set<token> boot_strapper::get_bootstrap_tokens(token_metadata metadata, database& db) {
-#if 0
-    Collection<String> initialTokens = DatabaseDescriptor.getInitialTokens();
+    auto initial_tokens = db.get_initial_tokens();
    // if user specified tokens, use those
-    if (initialTokens.size() > 0)
-    {
-        logger.debug("tokens manually specified as {}",  initialTokens);
-        List<Token> tokens = new ArrayList<Token>(initialTokens.size());
-        for (String tokenString : initialTokens)
-        {
-            Token token = StorageService.getPartitioner().getTokenFactory().fromString(tokenString);
-            if (metadata.getEndpoint(token) != null)
-                throw new ConfigurationException("Bootstrapping to existing token " + tokenString + " is not allowed (decommission/removenode the old node first).");
-            tokens.add(token);
+    if (initial_tokens.size() > 0) {
+        logger.debug("tokens manually specified as {}", initial_tokens);
+        std::unordered_set<token> tokens;
+        for (auto& token_string : initial_tokens) {
+            auto token = dht::global_partitioner().from_sstring(token_string);
+            if (metadata.get_endpoint(token)) {
+                throw std::runtime_error(sprint("Bootstrapping to existing token %s is not allowed (decommission/removenode the old node first).", token_string));
+            }
+            tokens.insert(token);
        }
+        logger.debug("Get manually specified bootstrap_tokens={}", tokens);
        return tokens;
    }
-#endif
+
    size_t num_tokens = db.get_config().num_tokens();
    if (num_tokens < 1) {
        throw std::runtime_error("num_tokens must be >= 1");
    }

-    // if (numTokens == 1)
-    //     logger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
+    if (num_tokens == 1) {
+        logger.warn("Picking random token for a single vnode.  You should probably add more vnodes; failing that, you should probably specify the token manually");
+    }

    auto tokens = get_random_tokens(metadata, num_tokens);
-    logger.debug("Get bootstrap_tokens={}", tokens);
+    logger.debug("Get random bootstrap_tokens={}", tokens);
    return tokens;
 }

--- a/dht/byte_ordered_partitioner.cc
+++ b/dht/byte_ordered_partitioner.cc
@@ -34,12 +34,12 @@ token byte_ordered_partitioner::get_random_token()

 std::map<token, float> byte_ordered_partitioner::describe_ownership(const std::vector<token>& sorted_tokens)
 {
-    throw std::runtime_error("not implemented");
+    throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 }

 token byte_ordered_partitioner::midpoint(const token& t1, const token& t2) const
 {
-    throw std::runtime_error("not implemented");
+    throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
 }

 unsigned
--- a/dht/i_partitioner.hh
+++ b/dht/i_partitioner.hh
@@ -386,12 +386,22 @@ public:
    friend std::ostream& operator<<(std::ostream&, const ring_position&);
 };

+// Trichotomic comparator for ring_position
 struct ring_position_comparator {
    const schema& s;
    ring_position_comparator(const schema& s_) : s(s_) {}
    int operator()(const ring_position& lh, const ring_position& rh) const;
 };

+// "less" comparator for ring_position
+struct ring_position_less_comparator {
+    const schema& s;
+    ring_position_less_comparator(const schema& s_) : s(s_) {}
+    bool operator()(const ring_position& lh, const ring_position& rh) const {
+        return lh.less_compare(s, rh);
+    }
+};
+
 struct token_comparator {
    // Return values are those of a trichotomic comparison.
    int operator()(const token& t1, const token& t2) const;
--- a/dht/murmur3_partitioner.cc
+++ b/dht/murmur3_partitioner.cc
@@ -75,6 +75,9 @@ token murmur3_partitioner::get_random_token() {
 }

 inline int64_t long_token(const token& t) {
+    if (t.is_minimum()) {
+        return std::numeric_limits<long>::min();
+    }

    if (t._data.size() != sizeof(int64_t)) {
        throw runtime_exception(sprint("Invalid token. Should have size %ld, has size %ld\n", sizeof(int64_t), t._data.size()));
@@ -85,18 +88,8 @@ inline int64_t long_token(const token& t) {
    return net::ntoh(*lp);
 }

-// XXX: Technically, this should be inside long token. However, long_token is
-// used quite a lot in hot paths, so it is better to keep the branches of, if
-// we can. Most our comparators will check for _kind separately,
-// so this should be fine.
 sstring murmur3_partitioner::to_sstring(const token& t) const {
-    int64_t lt;
-    if (t._kind == dht::token::kind::before_all_keys) {
-        lt = std::numeric_limits<long>::min();
-    } else {
-        lt = long_token(t);
-    }
-    return ::to_sstring(lt);
+    return ::to_sstring(long_token(t));
 }

 dht::token murmur3_partitioner::from_sstring(const sstring& t) const {
@@ -119,17 +112,35 @@ int murmur3_partitioner::tri_compare(const token& t1, const token& t2) {
    }
 }

+// Assuming that x>=y, return the positive difference x-y.
+// The return type is an unsigned type, as the difference may overflow
+// a signed type (e.g., consider very positive x and very negative y).
+template <typename T>
+static std::make_unsigned_t<T> positive_subtract(T x, T y) {
+        return std::make_unsigned_t<T>(x) - std::make_unsigned_t<T>(y);
+}
+
 token murmur3_partitioner::midpoint(const token& t1, const token& t2) const {
    auto l1 = long_token(t1);
    auto l2 = long_token(t2);
-    // long_token is defined as signed, but the arithmetic works out the same
-    // without invoking undefined behavior with a signed type.
-    auto delta = (uint64_t(l2) - uint64_t(l1)) / 2;
-    if (l1 > l2) {
-        // wraparound
-        delta += 0x8000'0000'0000'0000;
+    int64_t mid;
+    if (l1 <= l2) {
+        // To find the midpoint, we cannot use the trivial formula (l1+l2)/2
+        // because the addition can overflow the integer. To avoid this
+        // overflow, we first notice that the above formula is equivalent to
+        // l1 + (l2-l1)/2. Now, "l2-l1" can still overflow a signed integer
+        // (e.g., think of a very positive l2 and very negative l1), but
+        // because l1 <= l2 in this branch, we note that l2-l1 is positive
+        // and fits an *unsigned* int's range. So,
+        mid = l1 + positive_subtract(l2, l1)/2;
+    } else {
+        // When l2 < l1, we need to switch l1 and and l2 in the above
+        // formula, because now l1 - l2 is positive.
+        // Additionally, we consider this case is a "wrap around", so we need
+        // to behave as if l2 + 2^64 was meant instead of l2, i.e., add 2^63
+        // to the average.
+        mid = l2 + positive_subtract(l1, l2)/2 + 0x8000'0000'0000'0000;
    }
-    auto mid = uint64_t(l1) + delta;
    return get_token(mid);
 }

--- a/dht/range_streamer.cc
+++ b/dht/range_streamer.cc
@@ -41,9 +41,11 @@
 #include "locator/snitch_base.hh"
 #include "database.hh"
 #include "gms/gossiper.hh"
+#include "gms/failure_detector.hh"
 #include "log.hh"
 #include "streaming/stream_plan.hh"
 #include "streaming/stream_state.hh"
+#include "service/storage_service.hh"

 namespace dht {

@@ -55,14 +57,7 @@ static std::unordered_map<range<token>, std::unordered_set<inet_address>>
 unordered_multimap_to_unordered_map(const std::unordered_multimap<range<token>, inet_address>& multimap) {
    std::unordered_map<range<token>, std::unordered_set<inet_address>> ret;
    for (auto x : multimap) {
-        auto& range_token = x.first;
-        auto& ep = x.second;
-        auto it = ret.find(range_token);
-        if (it != ret.end()) {
-            it->second.emplace(ep);
-        } else {
-            ret.emplace(range_token, std::unordered_set<inet_address>{ep});
-        }
+        ret[x.first].emplace(x.second);
    }
    return ret;
 }
@@ -115,7 +110,6 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, st
    auto& ks = _db.local().find_keyspace(keyspace_name);
    auto& strat = ks.get_replication_strategy();

-    // std::unordered_multimap<range<token>, inet_address>
    auto tm = _metadata.clone_only_token_map();
    auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(tm));

@@ -166,23 +160,24 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
        for (auto& x : range_addresses) {
            const range<token>& src_range = x.first;
            if (src_range.contains(desired_range, dht::tri_compare)) {
-                auto old_endpoints = x.second;
+                std::vector<inet_address> old_endpoints(x.second.begin(), x.second.end());
                auto it = pending_range_addresses.find(desired_range);
-                assert (it != pending_range_addresses.end());
-                auto new_endpoints = it->second;
+                if (it == pending_range_addresses.end()) {
+                    throw std::runtime_error(sprint("Can not find desired_range = {} in pending_range_addresses", desired_range));
+                }
+                std::unordered_set<inet_address> new_endpoints = it->second;

                //Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
                //So we need to be careful to only be strict when endpoints == RF
                if (old_endpoints.size() == strat.get_replication_factor()) {
-                    std::unordered_set<inet_address> diff;
-                    std::set_difference(old_endpoints.begin(), old_endpoints.end(),
-                            new_endpoints.begin(), new_endpoints.end(), std::inserter(diff, diff.begin()));
-                    old_endpoints = std::move(diff);
+                    auto it = std::remove_if(old_endpoints.begin(), old_endpoints.end(),
+                        [&new_endpoints] (inet_address ep) { return new_endpoints.count(ep); });
+                    old_endpoints.erase(it, old_endpoints.end());
                    if (old_endpoints.size() != 1) {
-                        throw std::runtime_error(sprint("Expected 1 endpoint but found ", old_endpoints.size()));
+                        throw std::runtime_error(sprint("Expected 1 endpoint but found %d", old_endpoints.size()));
                    }
                }
-                range_sources.emplace(desired_range, *(old_endpoints.begin()));
+                range_sources.emplace(desired_range, old_endpoints.front());
            }
        }

@@ -210,9 +205,7 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
 bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name) {
    auto& ks = _db.local().find_keyspace(keyspace_name);
    auto& strat = ks.get_replication_strategy();
-    // FIXME: DatabaseDescriptor.isReplacing()
-    auto is_replacing = false;
-    return !is_replacing
+    return !_db.local().is_replacing()
           && use_strict_consistency()
           && !_tokens.empty()
           && _metadata.get_all_endpoints().size() != strat.get_replication_factor();
@@ -229,25 +222,17 @@ void range_streamer::add_ranges(const sstring& keyspace_name, std::vector<range<
        }
    }

-    // TODO: share code with unordered_multimap_to_unordered_map
-    std::unordered_map<inet_address, std::vector<range<token>>> tmp;
+    std::unordered_map<inet_address, std::vector<range<token>>> range_fetch_map;
    for (auto& x : get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name)) {
-        auto& addr = x.first;
-        auto& range_ = x.second;
-        auto it = tmp.find(addr);
-        if (it != tmp.end()) {
-            it->second.push_back(range_);
-        } else {
-            tmp.emplace(addr, std::vector<range<token>>{range_});
-        }
+        range_fetch_map[x.first].emplace_back(x.second);
    }

    if (logger.is_enabled(logging::log_level::debug)) {
-        for (auto& x : tmp) {
+        for (auto& x : range_fetch_map) {
            logger.debug("{} : range {} from source {} for keyspace {}", _description, x.second, x.first, keyspace_name);
        }
    }
-    _to_fetch.emplace(keyspace_name, std::move(tmp));
+    _to_fetch.emplace(keyspace_name, std::move(range_fetch_map));
 }

 future<streaming::stream_state> range_streamer::fetch_async() {
@@ -268,4 +253,17 @@ future<streaming::stream_state> range_streamer::fetch_async() {
    return _stream_plan.execute();
 }

+std::unordered_multimap<inet_address, range<token>>
+range_streamer::get_work_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_source_target,
+             const sstring& keyspace) {
+    auto filter = std::make_unique<dht::range_streamer::failure_detector_source_filter>(gms::get_local_failure_detector());
+    std::unordered_set<std::unique_ptr<i_source_filter>> source_filters;
+    source_filters.emplace(std::move(filter));
+    return get_range_fetch_map(ranges_with_source_target, source_filters, keyspace);
+}
+
+bool range_streamer::use_strict_consistency() {
+    return service::get_local_storage_service().db().local().get_config().consistent_rangemovement();
+}
+
 } // dht
--- a/dht/range_streamer.hh
+++ b/dht/range_streamer.hh
@@ -62,10 +62,7 @@ public:
    using stream_plan = streaming::stream_plan;
    using stream_state = streaming::stream_state;
    using i_failure_detector = gms::i_failure_detector;
-    static bool use_strict_consistency() {
-        //FIXME: Boolean.parseBoolean(System.getProperty("cassandra.consistent.rangemovement","true"));
-        return true;
-    }
+    static bool use_strict_consistency();
 public:
    /**
     * A filter applied to sources to stream from when constructing a fetch map.
@@ -73,6 +70,7 @@ public:
    class i_source_filter {
    public:
        virtual bool should_include(inet_address endpoint) = 0;
+        virtual ~i_source_filter() {}
    };

    /**
@@ -148,11 +146,11 @@ private:
                        const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters,
                        const sstring& keyspace);

+public:
+    static std::unordered_multimap<inet_address, range<token>>
+    get_work_map(const std::unordered_multimap<range<token>, inet_address>& ranges_with_source_target,
+                 const sstring& keyspace);
 #if 0
-    public static Multimap<InetAddress, Range<Token>> getWorkMap(Multimap<Range<Token>, InetAddress> rangesWithSourceTarget, String keyspace)
-    {
-        return getRangeFetchMap(rangesWithSourceTarget, Collections.<ISourceFilter>singleton(new FailureDetectorSourceFilter(FailureDetector.instance)), keyspace);
-    }

    // For testing purposes
    Multimap<String, Map.Entry<InetAddress, Collection<Range<Token>>>> toFetch()
--- a/dht/token_range.hh
+++ b/dht/token_range.hh
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 ScyllaDB.
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General License for more details.
+ *
+ * You should have received a copy of the GNU General License
+ * along with Scylla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include <vector>
+
+namespace dht {
+struct endpoint_details {
+    sstring _host;
+    sstring _datacenter;
+    sstring _rack;
+};
+
+struct token_range {
+    sstring _start_token;
+    sstring _end_token;
+    std::vector<sstring> _endpoints;
+    std::vector<sstring> _rpc_endpoints;
+    std::vector<endpoint_details> _endpoint_details;
+};
+}
--- a/dist/ami/build_ami.sh
+++ b/dist/ami/build_ami.sh
@@ -1,9 +1,17 @@
 #!/bin/sh -e

-if [ ! -e dist/ami/build_ami.sh ] || [ ! -e ../scylla-jmx/dist/redhat/build_rpm.sh ] || [ ! -e ../cassandra/dist/redhat/build_rpm.sh ]; then
+if [ ! -e dist/ami/build_ami.sh ]; then
    echo "run build_ami.sh in top of scylla dir"
-    echo "please make sure scylla-jmx is checked out under the same directory as scylla"
-    echo "please make sure cassandra with scylla tools branch checked out under the same directory as scylla"
+    exit 1
+fi
+
+TARGET_JSON=scylla.json
+if [ "$1" != "" ]; then
+    TARGET_JSON=$1
+fi
+
+if [ ! -f dist/ami/$TARGET_JSON ]; then
+    echo "dist/ami/$TARGET_JSON does not found"
    exit 1
 fi

@@ -14,39 +22,6 @@ if [ ! -f variables.json ]; then
    exit 1
 fi

-if [ ! -f files/scylla-server.rpm ] || [ ! -f files/scylla-server-debuginfo.rpm ]; then
-    cd ../../
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-server-$SCYLLA_VERSION-$SCYLLA_RELEASE*.x86_64.rpm|grep -v debuginfo`
-    cp $RPM dist/ami/files/scylla-server.rpm
-    cp build/rpms/scylla-server-debuginfo-$SCYLLA_VERSION-$SCYLLA_RELEASE*.x86_64.rpm dist/ami/files/scylla-server-debuginfo.rpm
-    cd -
-fi
-
-if [ ! -f files/scylla-jmx.rpm ]; then
-    CWD=`pwd`
-    cd ../../../scylla-jmx
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-jmx-$SCYLLA_VERSION-$SCYLLA_RELEASE*.noarch.rpm`
-    cp $RPM $CWD/files/scylla-jmx.rpm
-    cd -
-fi
-
-if [ ! -f files/scylla-tools.rpm ]; then
-    CWD=`pwd`
-    cd ../../../cassandra
-    dist/redhat/build_rpm.sh
-    SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
-    SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
-    RPM=`ls build/rpms/scylla-tools-$SCYLLA_VERSION-$SCYLLA_RELEASE*.noarch.rpm`
-    cp $RPM $CWD/files/scylla-tools.rpm
-    cd -
-fi
-
 if [ ! -d packer ]; then
    wget https://dl.bintray.com/mitchellh/packer/packer_0.8.6_linux_amd64.zip
    mkdir packer
@@ -55,4 +30,4 @@ if [ ! -d packer ]; then
    cd -
 fi

-packer/packer build -var-file=variables.json scylla.json
+packer/packer build -var-file=variables.json $TARGET_JSON
--- a/dist/ami/build_ami_local.sh
+++ b/dist/ami/build_ami_local.sh
@@ -0,0 +1,30 @@
+#!/bin/sh -e
+
+if [ ! -e dist/ami/build_ami_local.sh ]; then
+    echo "run build_ami_local.sh in top of scylla dir"
+    exit 1
+fi
+
+sudo yum -y install git
+if [ ! -f dist/ami/scylla-server.x86_64.rpm ]; then
+    dist/redhat/build_rpm.sh
+    cp build/rpms/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/scylla-server.x86_64.rpm
+fi
+if [ ! -f dist/ami/scylla-jmx.noarch.rpm ]; then
+    cd build
+    git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
+    cd scylla-jmx
+    sh -x -e dist/redhat/build_rpm.sh
+    cd ../..
+    cp build/scylla-jmx/build/rpms/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/scylla-jmx.noarch.rpm
+fi
+if [ ! -f dist/ami/scylla-tools.noarch.rpm ]; then
+    cd build
+    git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
+    cd scylla-tools-java
+    sh -x -e dist/redhat/build_rpm.sh
+    cd ../..
+    cp build/scylla-tools-java/build/rpms/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/scylla-tools.noarch.rpm
+fi
+
+exec dist/ami/build_ami.sh scylla_local.json
--- a/dist/ami/files/.bash_profile
+++ b/dist/ami/files/.bash_profile
@@ -0,0 +1,45 @@
+# .bash_profile
+
+# Get the aliases and functions
+if [ -f ~/.bashrc ]; then
+	. ~/.bashrc
+fi
+
+# User specific environment and startup programs
+
+PATH=$PATH:$HOME/.local/bin:$HOME/bin
+
+export PATH
+
+echo
+echo '   _____            _ _       _____  ____  '
+echo '  / ____|          | | |     |  __ \|  _ \ '
+echo ' | (___   ___ _   _| | | __ _| |  | | |_) |'
+echo '  \___ \ / __| | | | | |/ _` | |  | |  _ < '
+echo '  ____) | (__| |_| | | | (_| | |__| | |_) |'
+echo ' |_____/ \___|\__, |_|_|\__,_|_____/|____/ '
+echo '               __/ |                       '
+echo '              |___/                        '
+echo ''
+echo ''
+echo 'Nodetool:'
+echo '	nodetool --help'
+echo 'CQL Shell:'
+echo '	cqlsh'
+echo 'More documentation available at: '
+echo '	http://www.scylladb.com/doc/'
+echo
+
+if [ "`systemctl is-active scylla-server`" = "active" ]; then
+	tput setaf 4
+	tput bold
+	echo "    ScyllaDB is active."
+	tput sgr0
+else
+	tput setaf 1
+	tput bold
+	echo "    ScyllaDB is not started!"
+	tput sgr0
+	echo "Please wait for startup. To see status of ScyllaDB, run "
+	echo " 'systemctl status scylla-server'"
+fi
--- a/dist/ami/files/coredump.conf
+++ b/dist/ami/files/coredump.conf
@@ -1,5 +0,0 @@
-[Coredump]
-Storage=external
-Compress=yes
-ProcessSizeMax=16G
-ExternalSizeMax=16G
--- a/dist/ami/files/scylla-ami
+++ b/dist/ami/files/scylla-ami
--- a/Show More
+++ b/Show More