USACO 5.1 Starry Night

The basic idea is to find each cluster, and then compare each cluster pair by pair. There are several sub problems to solve in this case:

How to represent a cluster?
A cluster can be represented by the rectangle that covers the cluster together with the source grid (the sky). Each cluster has two key properties: the number of stars, and the covering rectangle, which can be represented by left/bottom and right/upper points.
How to find a cluster?
A cluster can be found by using flood fill algorithm against the sky.
How to compare two clusters?
Two clusters are obviously different if they contain different number of stars. For clusters contain same number of stars, a set of transforms need to be performed on one cluster then compare the transformed cluster with another cluster:
- Rotate clockwise 90 degree.
- Rotate clockwise 180 degree.
- Rotate clockwise 270 degree.
- Rotate clockwise 360 degree.
- Mirror the cluster then repeat.

#include <vector>
#include <queue>
#include <iostream>
#include <fstream>

using namespace std;

int R, C;
typedef struct Cluster {
    int min_x, min_y, max_x, max_y;
    int num_of_stars;
    Cluster() : min_x(-1), min_y(-1), max_x(-1), max_y(-1), num_of_stars(0) {}
} Cluster;
typedef struct Point {
    int x; int y;
    Point(int x, int y) : x(x), y(y) {}
} Point;
vector<vector<char>> sky;
vector<vector<short>> mark;
vector<Cluster> clusters(505);
char p[550];
int idx;
void calculate_bound(int x, int y) {
    clusters[idx].min_x = clusters[idx].min_x == -1 ? x : std::min(x, clusters[idx].min_x);
    clusters[idx].min_y = clusters[idx].min_y == -1 ? y : std::min(y, clusters[idx].min_y);
    clusters[idx].max_x = clusters[idx].max_x == -1 ? x : std::max(x, clusters[idx].max_x);
    clusters[idx].max_y = clusters[idx].max_y == -1 ? y : std::max(y, clusters[idx].max_y);
}

int flood_fill(int x, int y, int index) {
    int num_of_stars = 1;
    mark[x][y] = index;
    calculate_bound(x, y);
    
    queue<Point> q;
    q.push(Point(x, y));
    while (!q.empty()) {
        auto p = q.front(); q.pop();
        for (int i = -1; i <= 1; ++i) {
            for (int j = -1; j <= 1; ++j) {
                int xx = p.x + i, yy = p.y + j;
                if (xx < 0 || xx >= R || yy < 0 || yy >= C) continue;
                if (sky[xx][yy] != '1' || mark[xx][yy] != -1) continue;
                ++num_of_stars;
                mark[xx][yy] = index;
                calculate_bound(xx, yy);
                q.push(Point(xx, yy));
            }
        }
    }
    return num_of_stars;
}

// Rotate 90 degree clockwise.
void rotate(vector<vector<char>> &cluster, int lx, int ly) {
    int rows = (int)cluster.size(), cols = (int)cluster[0].size();
    vector<vector<char>> t(rows, vector<char>(cols));
    
    for(int i = 0; i < lx; ++i) {
        for(int j = 0; j < ly; ++j) {
            t[j][lx - i - 1] = cluster[i][j];
        }
    }
    
    for(int i = 0; i < ly; ++i) {
        for(int j = 0; j < lx; ++j) {
            cluster[i][j] = t[i][j];
        }
    }
}

void mirror(vector<vector<char>> &cluster, int lx, int ly) {
    for(int i = 0; i < lx; ++i) {
        for(int j = 0; j < ly/2; ++j) {
            swap(cluster[i][j], cluster[i][ly - 1 - j]);
        }
    }
}

bool match(int index, vector<vector<char>> cluster, int lx, int ly) {
    for(int i = 0; i < lx; ++i) {
        for(int j = 0; j < ly; ++j) {
            int xx = clusters[index].min_x + i,
                yy = clusters[index].min_y + j;
            if(mark[xx][yy] == index && cluster[i][j] == '0') return false;
        }
    }
    return true;
}

bool is_two_cluster_same(int a, int b) {
    if (clusters[a].num_of_stars != clusters[b].num_of_stars) {
        return false;
    }
    
    int ax = clusters[a].max_x - clusters[a].min_x + 1,
        ay = clusters[a].max_y - clusters[a].min_y + 1;
    int lx = clusters[b].max_x - clusters[b].min_x + 1,
        ly = clusters[b].max_y - clusters[b].min_y + 1;
    vector<vector<char>> tmp(110, vector<char>(110));
    for (int i = 0; i < lx; ++i) {
        for (int j = 0; j < ly; ++j) {
            int xx = i + clusters[b].min_x, yy = j + clusters[b].min_y;
            tmp[i][j] = sky[xx][yy];
        }
    }
    if (ax == lx && ay == ly && match(a, tmp, lx, ly)) return true;
    for(int i = 0; i < 4; ++i) {
        rotate(tmp, lx, ly);
        swap(lx, ly);
        if(ax == lx && ay == ly && match(a, tmp, lx, ly)) return true;
    }
    mirror(tmp, lx, ly);
    for(int i = 0; i < 4; ++i) {
        rotate(tmp, lx, ly);
        swap(lx, ly);
        if(ax == lx && ay == ly && match(a, tmp, lx, ly)) return true;
    }
    return false;
}

int main() {
    ifstream fin("starry.in");
    ofstream fout("starry.out");
    
    fin >> C >> R;
    sky = vector<vector<char>>(R, vector<char>(C));
    mark = vector<vector<short>>(R, vector<short>(C, -1));
    for (int i = 0; i < R; ++i) {
        for (int j = 0; j < C; ++j) {
            fin >> sky[i][j];
        }
    }
    
    idx = 0;
    for (int i = 0; i < R; ++i) {
        for (int j = 0; j < C; ++j) {
            if (sky[i][j] == '1' && mark[i][j] == -1) {
                int num_of_stars = flood_fill(i, j, idx);
                clusters[idx].num_of_stars = num_of_stars;
                ++idx;
                cout << idx - 1 << " : stars " << num_of_stars << endl;
            }
        }
    }
    
    char c = 'a';
    vector<vector<char>> ans = sky;
    for(int i = 0; i < idx; ++i) {
        int index = -1;
        for(int j = 0; j < i; ++j) {
            if(is_two_cluster_same(i, j)) {
                index = j;
                cout << i << " and " << j << " same!" << endl;
                break;
            }
        }
        if(index != -1)  {
            p[i] = p[index];
        } else {
            p[i] = c++;
        }
        for(int ii = clusters[i].min_x; ii <= clusters[i].max_x; ++ii) {
            for(int jj = clusters[i].min_y; jj <= clusters[i].max_y; ++jj) {
                if(mark[ii][jj] == i) {
                   ans[ii][jj] = p[i];
                }
            }
        }
    }
    
    for(int i = 0; i < R; ++i) {
        for(int j = 0; j < C; ++j) {
            fout << ans[i][j];
            cout << ans[i][j];
        }
        fout << endl;
        cout << endl;
    }
}

2017-03-05

USACO

USACO 5.1 Fencing The Cows

The answer is the perimeter of the convex hull of all the grazing points. Use Graham Scan to calculate the convex hull, because it’s efficient and easy to implement:

First, pick a point that has smallest x coordinate. Use y coordinate to break ties if necessary. Let’s call this point the base point.
Then iterate for the rest of the points, calculate the polar angle between the current point and the base point.
Now we get all points and their polar angles with respect to the base point, sort these points based on polar angle.
Walk through the sorted points and make sure a new point only appear in the counter clock wise direction (turn left) of the current point. Add the new point to the result, and drop old points if necessary.
We should now have a convex hull point sets available. Calculate the perimeter is now trivial.

This implementation also dealt with degenerated case where the points are co-linear.

#include <vector>
#include <iostream>
#include <fstream>
#include <limits>
#include <algorithm>
#include <cmath>
#include <stack>
#include <iomanip>

using namespace std;

int N;
struct Point {
    double x; double y; double pa /* Polar Angle with reference point. */;
};
vector<Point> points;

double dist(Point &a, Point &b) {
    return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2));
}

bool ccw(Point &a, Point &b, Point &c) {
    double area = (b.x - a.x) * (c.y - a.y) - (c.x - a.x) * (b.y - a.y);
    if (area > 0) {
        return true;
    } else {
        return false;
    }
}

double graham_scan() {
    double ans = 0;
    stack<Point> stack;
    stack.push(points[0]);
    stack.push(points[1]);
    stack.push(points[2]);
    
    for (int i = 3; i < N; i++) {
        Point top = stack.top();
        stack.pop();
        while (!ccw(stack.top(), top, points[i]))   {
            top = stack.top();
            stack.pop();
        }
        stack.push(top);
        stack.push(points[i]);
    }
    
    // Save the last point to close the hull with p0.
    Point p1 = stack.top(); stack.pop();
    Point cur = p1;
    while (!stack.empty()) {
        ans += dist(cur, stack.top());
        cur = stack.top(); stack.pop();
    }
    ans += dist(cur, p1);
    return ans;
}

int main() {
    ifstream fin("fc.in");
    ofstream fout("fc.out");
    fin >> N;
    points = vector<Point>(N);
    int idx = 0;
    for (int i = 0; i < N; ++i) {
        fin >> points[i].x >> points[i].y;
        if (points[i].x < points[idx].x) {
            idx = i;
        } else if (abs(points[i].x - points[idx].x) <
                   std::numeric_limits<double>::epsilon() &&
                   points[i].y < points[idx].y) {
            idx = i;
        }
    }
    
    std::swap(points[idx], points[0]);
    for (int i = 1; i < N; ++i) {
        points[i].pa = (points[i].y - points[0].y) / dist(points[i], points[0]);
    }
    sort(points.begin() + 1, points.end(), [] (const Point &a, const Point &b) {
        return a.pa < b.pa;
    });
    
    double ret = graham_scan();
    cout << std::fixed << std::setprecision(2) << ret << endl;
    fout << std::fixed << std::setprecision(2) << ret << endl;
}

2017-03-03

Distributed System

ZooKeeper Consistency Guarantees

There are many consistency models out there, from stronger ones to weaker ones. What consistency model best describes ZooKeeper?

Consistency Models

It is useful to think consistency models from two perspectives: server side (data centric) and client side (client centric), in particular in the context of ZooKeeper (or a general distributed storage system that is composed of clients and servers.).

Data Centric Consistency Models
- Strict Consistency
  The strongest consistency model for distributed systems. All changes to system state are required to populate to all replicas instantly, so any write should be immediately visible to all parts of the system (both server and client). More formally, any read on a data item should returns a value corresponding to the result of the most recent write on this item. This system model is not possible to implement on top of the current hardware for distributed systems, because it requires a global clock (for strict ordering), and zero latency and infinite bandwidth for communications between different parts of the system. A closer resemble of a system that satisfy this model is a single machine where latency / bandwidth / clock constraints are almost satisfied (even on a single machine, achieve strict consistency is hard due to cache hierarchies and latencies of memory / cache access.).
  Because of the impossibilities of implementation, a less strict model called atomic register was proposed (from Leslie Lamport), and a particular version of this model is known as linearizable consistency.
  - Linearizable consistency
    Linearizable consistency model, comparing to strict consistency model, takes the latency into account, so it does not require writes to an item being visible instantly. The real constraint of this consistency model is that the operations can’t interleave, can’t be reordered, and the sequence of writes are visible in strict global ordering as these writes were issued. It is a recency guarantee such that if a change to the value is visible to one client, the change has to be visible to all clients. A client can’t read old value if some other clients have seen the new value. The change history is linearized such that all operations appear to have executed atomically in the order that is consistent with the global real time ordering of operations. One nice property of linearizable consistency is that it composes - which is an extremely useful property for reasoning about the behavior of concurrent operations in distributed systems.
- Sequential Consistency
  Sequential consistency is a weaker consistency model comparing to linearizable consistency. It requires all operations appear to have executed atomically in some order, and the key constraint is that this order that is visible to all part of the system has to be consistent. However, this order does not have to be consistent with the ordering of the original operations. Because of this, the system is free to reorder the executions of the operations, it just need to satisfy a globally consistent visible orderings of operations for all clients. That is the key difference between linearizable consistency model. Also, sequential consistency does not compose as linearizable consistency does.
- Causal Consistency
  Causal consistency is a even weaker model than sequential consistency. It only requires causally related writes must be visible by all processes, while none causally related writes are free to executed in any order. This violates the constraint for sequential consistency (all writes must converge to the same order), so it is weaker model than sequential consistency. Sequential consistency is thus by definition implicitly causal consistency as well.
- Eventual Consistency
  The weakest consistency model, ever. Distributed systems adopt this model makes a lot things harder, though it is good trade off to make in certain use case. Eventual consistency is more about a liveness property (that values converge finally), so it is usually trivially satisfiable in most systems.
Client Centric Consistency Models:
- Monotonic reads
  Once read, subsequent reads on that data items return same or more recent values.
- Monotonic writes
  A write must be propagated to all replicas before a successive write by the same process; writes from same process are processed in same order.
- Read your writes
  read(x) always returns write(x) by that process.
- Writes follow reads
  write(x) following read(x) will take place on same or more recent version of x.

With all these said, what consistency model ZooKeeper fit in?

Linearizable consistency: ZooKeeper does not guarantee linearizable consistency. Two ZooKeeper clients can see two different values of the same data item (value of znode) at the same time - e.g. one client can see an old value while the other client can see the new value. Even with the sync command, which stalls the write processors, it is still possible that a client miss a value of the write (e.g. after sync executed finished but before client gets the updated value, another client issuing a new write that update the value again.). Also due to potential network delays it is hard to guarantee that operations on all clients happen on same order globally.
Sequential consistency: this is what ZooKeeper guarantees. Sequential consistency does not care about exact ordering of operations and ZooKeeper can reorder operations, but all the operations ordering will converge to a single globally consistent ordering that is visible to all clients (eventually, or near instantly if client uses sync command.).
Causal consistency: because ZooKeeper satisfies sequential consistency, it also satisfy causal consistency. More specifically, since causal consistency is a stronger guarantee than read your writes and monotonic reads, those properties are also guaranteed.
Monotonic reads / Read your writes: as discussed, these are trivially satisfied with stronger guarantee. Note in practice there are a lot of corner cases to consider to satisfy these properties from a client point of view, for example what if a client disconnect and connect to a server node where it does not have latest values for the same znode client has seen before (that this server lags behind leader, which is normal because ZooKeeper only requires a quorum of servers in sync with leader). This and similar cases are handled by the guarantees of ZooKeeper session which provides some strong guarantees with regards to how client and server could interact (will describe this in a future post).
Monotonic writes: ZooKeeper does not satisfy this because a write is only required to be replicated to a quorum of servers.
Also a side note is there is a long pending bug of the sync operation - it is not a quorum operation so the guarantee it provides only apply if the client is connecting to leader.. this needs to be fixed, soon.