今天我们来讲一种特殊的二叉树,二叉搜索树。学好二叉搜索树,有助于我们之后对map和set的理解与掌握。
二叉搜索树又称二叉排序树,它或者是一棵空树,或者是具有以下性质的二叉树:
对于二叉搜索树的大部分操作,我们大都有通过循环和递归两种方式来实现。不过递归的栈消耗较大,虽然代码比较简洁,我依旧推崇非递归的写法。
查找比较简单,因为二叉搜索树鲜明的特性,我们只要不断将待查找节点与树中的节点比较即可:
1.循环法
Node* find(const K& key) { if (_root == nullptr) { return false; } Node* cur = _root; while (cur) { if (cur->_key < key) { cur->_right; } else if(cur->_key>key) { cur->_left; } else { return cur; } } return nullptr; }
Node* _findR(Node* root, const K& key) { if (root == nullptr) { return nullptr; } else { if (cur->_key < key) { return _findR(cur->_right, key); } else if (cur->_key > key) { return _findR(cur->_right, key); } else { return cur; } } }
插入的思路分为两步:第一不是找到要插入的位置,然后创建节点进行插入。
循环写法:
//插入 bool insert(const K& key) { if (_root == nullptr) { _root = new Node(key); return true; } Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { return false; } } cur = new Node(key); if (cur->_key > parent->_key) { parent->_right = cur; } else { parent->_left = cur; } return true; }
递归写法:
//递归版插入 bool _insertR(Node* &root,const K& key) { if (root == nullptr) { root = new Node(key); return true; } Node* cur = root; else{ if (cur->_key > key) { return insertR(cur->_left, key); } else if(cur->_key<key){ return insertR(cur->_right, key); } else { return false; } } }
删除是二叉排序树的一个难点,我们删除的同时不能破坏其性质,我们对于不同的情况要具体分析:
对于要被删除的节点:
没有子节点(叶子节点)
这种情况最简单,直接删除即可,不会影响其他节点
有一个节点
这种情况下,我们将待删节点的父节点 指向 待删节点的子节点即可,然后再讲待删节点删掉。
循环写法:
bool erase(const K& key) { Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { if (cur->_left == nullptr) { if (cur == _root) { _root = cur->_right; } else { if (parent->_left == cur) { parent->_left = cur->_right; } else { parent->_right = cur->_right; } } delete cur; } else if (cur->_right == nullptr) { if (cur == _root) { _root = cur->_left; } else { if (parent->_left == cur) { parent->_left = cur->_left; } else { parent->_right = cur->_left; } } delete cur; } else { Node* minRightParent = cur; Node* minRight = cur->_right; while (minRight->_left) { minRightParent = minRight; minRight = minRight->_left; } cur->_key = minRight->_key; if (minRightParent->_left == minRight) { minRightParent->_left = minRight->_right; } else { minRightParent->_right = minRight->_right; } delete minRight; } return true; } } return false; }
递归写法:
bool _eraseR(Node* &root, const K& key) { if (root == nullptr) return false; if (root->_key < key) { return _eraseR(root->_right, key); } else if (root->_key > key) { return _eraseR(root->_left, key); } else { Node* del = root; if (root->_left == nullptr) { root = root->_right; } else if (root->_right == nullptr) { root = root->_left; } else { Node* minRight = root->_right; while (minRight->_left) { minRight = minRight->_left; } root->_key = minRight->_key; return _eraseR(root->_right, minRight->_key); } delete del; return true; } }
对于一个完整的二叉搜索树。我们还要加上拷贝构造和赋值拷贝以及销毁,析构函数等等。这里不再一一赘述了,直接贴出完整的代码:
#pragma once namespace yyk { template<class K> struct BSTreeNode { BSTreeNode<K>* _left; BSTreeNode<K>* _right; K _key; BSTreeNode(const K& key) :_left(nullptr), _right(nullptr), _key(key) {} }; template<class K> class BSTree { typedef BSTreeNode<K> Node; public: BSTree() :_root(nullptr) {} //拷贝构造 BSTree(const BSTree<K>& t) { _root = _copyR(t._root); } // 赋值拷贝 BSTree<K>& operator=(BSTree<K> t) { std::swap(_root, t._root); return *this; } //析构函数 ~BSTree() { _Destroy(_root); } //插入 bool insert(const K& key) { if (_root == nullptr) { _root = new Node(key); return true; } Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { return false; } } cur = new Node(key); if (cur->_key > parent->_key) { parent->_right = cur; } else { parent->_left = cur; } return true; } //查询 Node* find(const K& key) { if (_root == nullptr) { return false; } Node* cur = _root; while (cur) { if (cur->_key < key) { cur->_right; } else if(cur->_key>key) { cur->_left; } else { return cur; } } return nullptr; } //删除 bool erase(const K& key) { Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { if (cur->_left == nullptr) { if (cur == _root) { _root = cur->_right; } else { if (parent->_left == cur) { parent->_left = cur->_right; } else { parent->_right = cur->_right; } } delete cur; } else if (cur->_right == nullptr) { if (cur == _root) { _root = cur->_left; } else { if (parent->_left == cur) { parent->_left = cur->_left; } else { parent->_right = cur->_left; } } delete cur; } else { Node* minRightParent = cur; Node* minRight = cur->_right; while (minRight->_left) { minRightParent = minRight; minRight = minRight->_left; } cur->_key = minRight->_key; if (minRightParent->_left == minRight) { minRightParent->_left = minRight->_right; } else { minRightParent->_right = minRight->_right; } delete minRight; } return true; } } return false; } void InOrder() { _InOrder(_root); cout << endl; } void insertR(const K& key) { _insertR(_root,key); } void findR(const K& key) { _findR(_root, key); } void eraseR(const K& key) { _eraseR(_root, key); } private: //中序遍历 void _InOrder(Node* root) { if (root == nullptr) { return; } _InOrder(root->_left); cout << root->_key << " "; _InOrder(root->_right); } //递归版插入 bool _insertR(Node* &root,const K& key) { if (root == nullptr) { root = new Node(key); return true; } Node* cur = root; else{ if (cur->_key > key) { return insertR(cur->_left, key); } else if(cur->_key<key){ return insertR(cur->_right, key); } else { return false; } } } //递归版查找 Node* _findR(Node* root, const K& key) { if (root == nullptr) { return nullptr; } else { if (cur->_key < key) { return _findR(cur->_right, key); } else if (cur->_key > key) { return _findR(cur->_right, key); } else { return cur; } } } //递归版删除 bool _eraseR(Node* &root, const K& key) { if (root == nullptr) return false; if (root->_key < key) { return _eraseR(root->_right, key); } else if (root->_key > key) { return _eraseR(root->_left, key); } else { Node* del = root; if (root->_left == nullptr) { root = root->_right; } else if (root->_right == nullptr) { root = root->_left; } else { Node* minRight = root->_right; while (minRight->_left) { minRight = minRight->_left; } root->_key = minRight->_key; return _eraseR(root->_right, minRight->_key); } delete del; return true; } } //递归版拷贝函数 Node* _copyR(Node* root) { if (root == nullptr) { return nullptr; } Node* newRoot = new Node(root->_key); newRoot->_left = _copyR(root->_left); newRoot->_right = _copyR(_root->_right); return newRoot; } //递归版销毁函数 void _Destroy(Node* root) { if (root == nullptr) { return; } _Destroy(root->_left); _Destroy(root->_right); delete root; } Node* _root = nullptr; }; }
namespace KEY_VALUE { template<class K, class V> struct BSTreeNode { BSTreeNode<K, V>* _left; BSTreeNode<K, V>* _right; K _key; V _value; BSTreeNode(const K& key, const V& value) : _left(nullptr) , _right(nullptr) , _key(key) , _value(value) {} }; template<class K, class V> class BSTree { typedef BSTreeNode<K, V> Node; public: V& operator[](const K& key) { pair<Node*, bool> ret = Insert(key, V()); return ret.first->_value; } pair<Node*, bool> Insert(const K& key, const V& value) { if (_root == nullptr) { _root = new Node(key, value); return make_pair(_root, true); } // 查找要插入的位置 Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { return make_pair(cur, false); } } cur = new Node(key, value); if (parent->_key < cur->_key) { parent->_right = cur; } else { parent->_left = cur; } return make_pair(cur, true); } Node* Find(const K& key) { Node* cur = _root; while (cur) { if (cur->_key < key) { cur = cur->_right; } else if (cur->_key > key) { cur = cur->_left; } else { return cur; } } return nullptr; } bool Erase(const K& key) { Node* parent = nullptr; Node* cur = _root; while (cur) { if (cur->_key < key) { parent = cur; cur = cur->_right; } else if (cur->_key > key) { parent = cur; cur = cur->_left; } else { // 删除 if (cur->_left == nullptr) { if (cur == _root) { _root = cur->_right; } else { if (cur == parent->_left) { parent->_left = cur->_right; } else { parent->_right = cur->_right; } } delete cur; } else if (cur->_right == nullptr) { if (cur == _root) { _root = cur->_left; } else { if (cur == parent->_left) { parent->_left = cur->_left; } else { parent->_right = cur->_left; } } delete cur; } else { // 找到右树最小节点去替代删除 Node* minRightParent = cur; Node* minRight = cur->_right; while (minRight->_left) { minRightParent = minRight; minRight = minRight->_left; } cur->_key = minRight->_key; if (minRight == minRightParent->_left) minRightParent->_left = minRight->_right; else minRightParent->_right = minRight->_right; delete minRight; } return true; } } return false; } void InOrder() { _InOrder(_root); cout << endl; } private: void _InOrder(Node* root) { if (root == nullptr) { return; } _InOrder(root->_left); cout << root->_key << ":"<<root->_value<<endl; _InOrder(root->_right); } private: Node* _root = nullptr; }; }
KEY_VALUE 的两个应用:
void TestBSTree2() { KEY_VALUE::BSTree<string, string> dict; dict.Insert("sort", "排序"); dict.Insert("insert", "插入"); dict.Insert("tree", "树"); dict.Insert("right", "右边"); // ... string str; while (cin >> str) { if (str == "Q") { break; } else { auto ret = dict.Find(str); if (ret == nullptr) { cout << "拼写错误,请检查你的单词" << endl; } else { cout << ret->_key << "->" << ret->_value << endl; } } } }
void TestBSTree3() { // 统计字符串出现次数,也是经典key/value string str[] = { "sort", "sort", "tree", "insert", "sort", "tree", "sort", "test", "sort" }; KEY_VALUE::BSTree<string, int> countTree; //方法一: for (auto& e : str) { auto ret = countTree.Find(e); if (ret == nullptr) { countTree.Insert(e, 1); } else { ret->_value++; } } // 方法二: /*for (auto& e : str) { countTree[e]++; }*/ countTree.InOrder(); }
插入和删除操作都必须先查找,查找效率代表了二叉搜索树中各个操作的性能。
对有n个结点的二叉搜索树,若每个元素查找的概率相等,则二叉搜索树平均查找长度是结点在二叉搜索树的深度的函数,即结点越深,则比较次数越多。
但对于同一个关键码集合,如果各关键码插入的次序不同,可能得到不同结构的二叉搜索树:
最优情况下,二叉搜索树为完全二叉树,其平均比较次数为:log2 N
最差情况下,二叉搜索树退化为单支树,其平均比较次数为 N/2
问题:如果退化成单支树,二叉搜索树的性能就失去了。那能否进行改进,不论按照什么次序插入关键码,都可以是二叉搜索树的性能最佳?答案是存在的,但是我们之后再介绍。