From 28bc135e60f6b08ec6273819eb334bee155f10b1 Mon Sep 17 00:00:00 2001 From: junikimm717 <68165832+junikimm717@users.noreply.github.com> Date: Thu, 27 May 2021 19:47:23 -0400 Subject: [PATCH] Sets --- etc/sets.md | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++ lab/Main.cpp | 6 +- lab/Makefile | 2 +- 3 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 etc/sets.md diff --git a/etc/sets.md b/etc/sets.md new file mode 100644 index 0000000..5b4a1de --- /dev/null +++ b/etc/sets.md @@ -0,0 +1,185 @@ +# On Sets (and Maps) + +Sets are a powerful data structure used to reduce the complexities of many +problems that involve some method of efficient search. + +This will cover the basic C++ API for both ordered and unordered sets. + +## gp_hash_table + +Generally, the hash function of the c++ `unordered_set` does not work very well +(the specifics can be found at +[this CF Article](https://codeforces.com/blog/entry/60737)) + +Instead, as the article shows us, we should probably use the `gp_hash_table` +class, which is actually a map. (However, note that the map, in general, has all +of the same functionality that a set has). + +In order to use this class, use the following lines at the top of your code: + +```cpp +#include +using namespace __gnu_pbds; +``` + +## Unordered Map + +An unordered map is a data structure that can query for a value given a key in +O(1) time. It can also set a value to a specific key in O(1) time. + +### Inserting elements into a hash table + +```cpp +gp_hash_table mp; +mp["key"] = value; +``` + +The two type arguments given to the constructor are for the types of the keys +and the values respectively. + +**Merely** using the key of "key" in an equality operator like this will add the +key "key" to the set of keys that `mp` already has. + +### Finding if an element exists + +When a value for a specific key is initialized in a map, it will always be +initialized to a **0-like** value, unless you give it an explicit initialization +as shown above. You might then be tempted to do the following when checking if a +certain key is in the set of keys that the map has: + +```cpp +if (!mp["key"]) { + //code +} +``` + +You probably **never want to do this**, as this statement will implicitly add +"key" to the set of keys that the map already has, which is undesirable if +working under tight memory constraints. The correct method is as follows: + +```cpp +if (mp.find("key") != mp.end()) { + // code +} +``` + +`mp.end()` points to the element **past** the last key in a map (or hash table), +so if the `find` function yields us this value, it means that the value is not +in the hash table. + +## Regular Map + +Although a Hash Table can be quite useful, it is oftentimes limited by the fact +that the objects we need as keys are simply not hashable. If the complexity +allows for it, we can then use a map. A c++ map is a data structure that has +key-value pairs, but locates keys and values by the process of searching for the +keys, which has logarithmic complexity relative to the number of key-value pairs +that the map contains. + +In order to declare a map, + +```cpp +map mp; +``` + +The setting and querying methods are the same as before. + +## Applications of a map + +A map is used to store **key-value pairs**, and one can exploit this concept in +several different ways. + +For instance, let's say that you have a vector of a generic type and you want to +construct a frequency table. (Where I can efficiently query how many of a +certain object exist on a vector.) I can construct a map to be able to +efficiently query this, as shown below: + +```cpp +vector v; +/* +Initialize the vector somehow. +*/ +map freqtable; +for (T x : v) { + freqtable[x]++; +} +``` + +One can take advantage, in the case of number types, that a map value will +default to 0. However, the following is less concise, but more conceptually +clear: + +```cpp +map freqtable; +for (int i = 0; i < v.size(); ++i) { + T x = v[i]; + if (freqtable.find(x) != freqtable.end()) { + freqtable[x]++; + } else { + freqtable = 1; + } +} +``` + +For both examples, the type `T` is merely a placeholder, and you would put +whatever type you need for the task at hand in place of it. + +## Set + +C++ offers a `set` class which can be useful in several applications. + +The advantage of storing data like this is that it provides a way to get the +minimum/maximum of all of the values in a set in logarithmic time. + +To declare a set, use the following: + +```cpp +set s; +``` + +Preferably, `T` is already comparable (as it is painful to create a comparator +for sets). + +### Adding, Removing, and Finding + +In order to add a value to a set: + +```cpp +// s is the set that we declared earlier. +s.insert(value); +``` + +In order to remove a value from a set: + +```cpp +s.erase(value); +``` + +The `find` function for a set is the same as all data structures before. + +### Finding maximum/minimum. + +There are two main functions that are used, `begin()` and `end()`; + +#### First element + +Note that `begin()` returns a pointer to the minimum element, so you probably +want to do the following instead: + +```cpp +T first_element = *(s.begin()); +``` + +Pointers are a more advanced topic, but at a basic level, `s.begin()` is telling +us _where_ in memory the first element is, and the asterisk allows us to know +_what_ the first element actually is, based on its location. + +#### Last element + +As we have covered before, the `end()` method returns the pointer **one after** +the last value. As a result, we need to subtract the pointer that it yields (by +one) in order to get our desired results: + +```cpp +T last_element = *(--s.begin()); +``` diff --git a/lab/Main.cpp b/lab/Main.cpp index a77ae6a..6d3ee02 100644 --- a/lab/Main.cpp +++ b/lab/Main.cpp @@ -3,5 +3,9 @@ using namespace std; int main() { ios_base::sync_with_stdio(0); cin.tie(0); - + map mp; + mp[0] = 1; + mp[1] = 1; + mp[2] = 1; + cout << mp.begin()->first; } diff --git a/lab/Makefile b/lab/Makefile index b29f3b9..ec34916 100644 --- a/lab/Makefile +++ b/lab/Makefile @@ -1,2 +1,2 @@ -CXX=g++-10 +CXX=g++-11 CXXFLAGS=-std=c++11 -O2 -Wall -Wextra -Wshadow -D_GLIBCXX_DEBIG -fsanitize=address -fsanitize=undefined