Aggregation in MongoDB

Aggregation in MongoDB

ยท

7 min read

Hello, devs ๐Ÿ‘‹

In this blog, we will learn the basics of the MongoDB aggregation framework to filter, sort, group, and transform our MongoDB results. MongoDB helps us to do all these operations through aggregation pipelines which are a series of operations that process data documents sequentially.

For practice, we can use - Mongo playground

Input docs

[
  {
    "key": 1,
    username: "saurabh",
    age: 18,
    languages: [
      "c",
      "c++"
    ]
  },
  {
    "key": 2,
    username: "leonord",
    age: 22,
    languages: [
      "c",
      "c++",
      "java"
    ]
  },
  {
    "key": 3,
    username: "sheldon",
    age: 14,
    languages: [
      "c",
      "c++",
      "java",
      "python"
    ]
  },
  {
    "key": 4,
    username: "howard",
    age: 32,
    languages: [
      "c",
      "c++",
      "java",
      "python",
      "dart"
    ]
  },
  {
    "key": 5,
    username: "raj",
    age: 5,
    languages: [
      "c",
      "c++",
      "java",
      "python",
      "dart",
      "ts"
    ]
  }
]

1. $group aggregation = used for grouping and summarizing documents. We must specify an _id field with a valid expression.

Query

db.collection.aggregate([
  {
    $group: {
      _id: "table_stats",
      // Get count of all docs in the collection
      count: {
        $sum: 1
      },
      // Get age stats by grouping age field
      avgAge: {
        $avg: "$age"
      },
      maxAge: {
        $max: "$age"
      },
      minAge: {
        $min: "$age"
      },
      sumAge: {
        $sum: "$age"
      },
      // Get all usernames by grouping username field
      allUsernames: {
        $push: "$username"
      },
      // Get username of first doc
      firstUsername: {
        $first: "$username"
      },
      // Get username of last doc
      lastUsername: {
        $last: "$username"
      }
    }
  }
])

Result

[
  {
    "_id": "table_stats",
    "allUsernames": [
      "saurabh",
      "leonord",
      "sheldon",
      "howard",
      "raj"
    ],
    "avgAge": 18.2,
    "count": 5,
    "firstUsername": "saurabh",
    "lastUsername": "raj",
    "maxAge": 32,
    "minAge": 5,
    "sumAge": 91
  }
]

2. $match aggregation - This is used to reduce the number of docs in the result by filtering.

Query

// Match all docs where `age` is greater than 20 or equal to 20

db.collection.aggregate([
  {
    "$match": {
      age: {
        $gte: 20
      }
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "key": 2,
    "languages": [
      "c",
      "c++",
      "java"
    ],
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": [
      "c",
      "c++",
      "java",
      "python",
      "dart"
    ],
    "username": "howard"
  }
]

Query

// Match all docs that have languages either `python` or `dart` or both

db.collection.aggregate([
  {
    "$match": {
      languages: {
        $in: [
          "python",
          "dart"
        ]
      }
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "key": 3,
    "languages": [
      "c",
      "c++",
      "java",
      "python"
    ],
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": [
      "c",
      "c++",
      "java",
      "python",
      "dart"
    ],
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": [
      "c",
      "c++",
      "java",
      "python",
      "dart",
      "ts"
    ],
    "username": "raj"
  }
]

Query

// Match all docs with username `saurabh`

db.collection.aggregate([
  {
    "$match": {
      username: {
        $eq: "saurabh"
      }
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "key": 1,
    "languages": [
      "c",
      "c++"
    ],
    "username": "saurabh"
  }
]

Some of the match operators

  • $gte = Matches if values are greater or equal to the given value.
  • $lte = Matches if values are less or equal to the given value.
  • $lt = Matches if values are less than the given value.
  • $gt = Matches if values are greater than the given value.
  • $eq = Matches values that are equal to the given value.
  • $ne = Matches values that are not equal to the given value.
  • $in = Matches any of the values in an array.
  • $nin = Matches none of the values specified in an array.

3. $skip and $limit aggregation - $skip takes a positive integer that specifies the maximum number of documents to skip. $limit limits the number of documents to look at, by the given number starting from the current positions.

Without skip and limit

Query

// Get all docs with username lexicographically less than or equal to "saurabh"

db.collection.aggregate([
  {
    "$match": {
      username: {
        $lte: "saurabh"
      }
    }
  },
  // ignore this aggregation, for now, we'll look into it later
  {
    $project: {
      "languages": 0,
      "key": 0,
      "id": 0
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "username": "raj"
  }
]
Without skip = 1 and limit = 2

Query

// Skip the first doc ($skip) and return next 2 docs ($limit)

db.collection.aggregate([
  {
    "$match": {
      username: {
        $lte: "saurabh"
      }
    }
  },
  {
    $skip: 1
  },
  {
    $limit: 2
  },
  // ignore this aggregation for now
  {
    $project: {
      "languages": 0,
      "key": 0,
      "id": 0
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "username": "howard"
  }
]

4. $sort aggregation - Sorts all input documents and returns them to the pipeline in sorted order.

1 = sort ascending, -1 = sort descending.

Query

// Get all the docs sorted in ascending order on the `age` field

db.collection.aggregate([
  {
    $sort: {
      age: 1
    }
  },
  // ignore this aggregation for now
  {
    $project: {
      "languages": 0,
      "key": 0,
      "id": 0
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "username": "raj"
  }
]

5. $unwind aggregation - This is used to unwind documents that are using arrays.

Query

db.collection.aggregate([
  {
    $unwind: "$languages"
  },
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "key": 1,
    "languages": "c",
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "key": 1,
    "languages": "c++",
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "key": 2,
    "languages": "c",
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "key": 2,
    "languages": "c++",
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000001"),
    "age": 22,
    "key": 2,
    "languages": "java",
    "username": "leonord"
  },
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "key": 3,
    "languages": "c",
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "key": 3,
    "languages": "c++",
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "key": 3,
    "languages": "java",
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000002"),
    "age": 14,
    "key": 3,
    "languages": "python",
    "username": "sheldon"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": "c",
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": "c++",
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": "java",
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": "python",
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000003"),
    "age": 32,
    "key": 4,
    "languages": "dart",
    "username": "howard"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "c",
    "username": "raj"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "c++",
    "username": "raj"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "java",
    "username": "raj"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "python",
    "username": "raj"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "dart",
    "username": "raj"
  },
  {
    "_id": ObjectId("5a934e000102030405000004"),
    "age": 5,
    "key": 5,
    "languages": "ts",
    "username": "raj"
  }
]

Query

db.collection.aggregate([
  {
    $unwind: "$languages"
  },
  {
    $match: {
      username: "saurabh"
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "key": 1,
    "languages": "c",
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "age": 18,
    "key": 1,
    "languages": "c++",
    "username": "saurabh"
  }
]

6. $project aggregation - Get some specific fields from a collection by giving the keys values as 0 (exclude) or 1 (include)

Basic Query

db.collection.aggregate([
  {
    $project: {
      username: 1,
      languages: 1
    }
  },
  {
    $unwind: "$languages"
  },
  {
    $match: {
      username: "saurabh"
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "languages": "c",
    "username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "languages": "c++",
    "username": "saurabh"
  }
]

Query with update column names

db.collection.aggregate([
  {
    $project: {
      "system_username": "$username",
      "system_languages": "$languages"
    }
  },
  {
    $unwind: "$system_languages"
  },
  {
    $match: {
      system_username: "saurabh"
    }
  }
])

Result

[
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "system_languages": "c",
    "system_username": "saurabh"
  },
  {
    "_id": ObjectId("5a934e000102030405000000"),
    "system_languages": "c++",
    "system_username": "saurabh"
  }
]

Follow for more cool articles

Thanks ๐Ÿ˜Ž

ย