文档库 最新最全的文档下载
当前位置:文档库 › 数据库-实验4-数据库操作的实现算法-B树-索引查找

数据库-实验4-数据库操作的实现算法-B树-索引查找

数据库-实验4-数据库操作的实现算法-B树-索引查找
数据库-实验4-数据库操作的实现算法-B树-索引查找

实验四数据库操作的实现算法

1、实验目的:掌握B树索引查找算法,多路归并排序算法,并用高级语言实现

2、实验内容:

选择熟悉的高级语言设计和实现程序完成下列功能:

1)随机生成具有1,000,000条记录的文本文件,每条记录的长度为128字节,其中固定包含一个整型属性A,属性A的值随机生成。其他属性可以自己定义。

2)针对属性A,用高级语言实现两趟多路归并排序算法。要求在内存分配8M空间用于外部归并排序

3)以属性A为键值,实现B树索引。完成索引的插入,删除和查找。

3、实验报告

截屏给出实验结果

给出算法流程图

由于共有1000000个元组,每个元组128B,因此总大小为128M,又内存限制为8M,则至少要分成128M/8M=16组。为了尽可能的减少I/O次数,因此采用16路归并的方法;为了减少归并的时间,采用败者树,在O(lgn)的时间复杂度内选出最小的A属性元组,这种方法从划分到归并,读入两次,写出两次,最少的I/O次数是4*1000000.

附上程序代码

1、生成1000000个128字节的元组代码:

#include

#include

#include

#include

using namespace std;

const int N = 1000000;

string RandomChineseCharacters()

{

//srand( (unsigned)time(NULL));

int high = 0xd7 - 0xc1;//16-55区汉字

int low= 0xfe - 0xa1;

int high_zn ;

int low_zn;

char name[3];

name[2] = '\0';

string s;

for(int i = 0; i < 60; i++)

{

high_zn = rand()%high + 0xc1;

low_zn = rand()%low + 0xa1;

name[0]=high_zn;

name[1]=low_zn;

s.append(name);

}

return s;

}

int main()

{

ofstream ofp ("example2.txt");

if (!ofp.is_open())

{

cout<<"can't open file!"<

return 0;

}

srand( (unsigned)time(NULL));

int t1 = 0,t2 = 0;//4+

string name;

for(int i = 1;i <= N;i++)

{

t1 = rand();

t2 = i;

name = RandomChineseCharacters();

//cout<

ofp<

}

return 0;

}

2、16组快速排序和16路归并排序的代码

// Extenal_Merge_Sort.cpp : 定义控制台应用程序的入口点。//

#include "stdafx.h"

#include

#include

#include

#include

#include

#include

using namespace std;

typedef struct Data

{

int data;

int num;

char name[121];

} Data;

//利用败者树

const int N = 1000000;//数据总量

const int FILE_NUM = 16;//文件个数

const int MAX_PART = 62500;//每一个文件大小

FILE *fpreads[FILE_NUM];

const int MIN = -1; //最小值,必须比要排序数字的最小值要小,否则出错const int MAX = 9999999; //最大值,必须比要排序数字的最大值要大,否则出错

int result = 0;

int cmp(const void* a, const void *b)

{

if ((*(Data*)a).data > (*(Data *)b).data)

return 1;

else if ((*(Data*)a).data < (*(Data *)b).data)

return -1;

else

return 0;

}

//从unsort_data.txt中读取数据

int read_data(FILE *fp, Data *array, int N)

{

int length = 0;

for (int i = 0; i < MAX_PART && (EOF !=

fscanf_s(fp, "%d %d %s\n", &array[i].data, &array[i].num, array[i].name,_countof(array[i].name))); i++)

{

length++;

}

return length;

}

//打开data0.txt - data9.txt这10个文件

FILE* open_file(int count, char *mode)

{

FILE *fpwrite = NULL;

char filename[20];

memset(filename, 0, 20);

sprintf_s(filename,20, "data%d.txt", count);

fopen_s(&fpwrite,filename, mode);

assert(fpwrite != NULL);

return fpwrite;

}

//向data0.txt - data9.txt这10个文件写入排好序的数据

void write_data(Data *array, int N, int count)

{

FILE *fpwrite = open_file(count, "w");

int i = 0;

for (i = 0; i < N; i++)

{

fprintf(fpwrite, "%d %d %s\n", array[i].data, array[i].num, array[i].name);

}

fprintf(fpwrite, "%d %d %s\n", MAX, array[i - 1].num, array[i-1].name); //在每个文件最后写入一个最大值,表示文件结束

fclose(fpwrite);

}

//内部排序,调用16次快速排序,产生data0.txt - data16.txt这10个有序文件

void interior_sort(void)

{

clock_t begin = clock();

FILE *fpread = NULL;

fopen_s(&fpread,"unsort_data.txt", "r");

assert(fpread != NULL);

int count = 0;

Data *array = new Data[MAX_PART];

assert(array != NULL);

while (1)

{

memset(array, 0, sizeof(Data)* MAX_PART);

int length = read_data(fpread, array, MAX_PART);

if (length == 0)

{

break;

}

qsort(array, length, sizeof(Data), cmp);

write_data(array, length, count);

count++;

}

delete[] array;

fclose(fpread);

clock_t end = clock();

cout << "16 times Quick Sort Use Time " << (end - begin) / CLK_TCK <<

"s" << endl;

}

//调整

void adjust(int ls[], Data data[], int s) {

int t = (s + FILE_NUM) / 2;

while (t)

{

if (data[s].data > data[ls[t]].data)

{

int temp = s;

s = ls[t];

ls[t] = temp;

}

t /= 2;

}

ls[0] = s;

}

void create_loser_tree(int ls[], Data data[]) {

data[FILE_NUM].data = MIN;

for (int i = 0; i < FILE_NUM; i++)

{

ls[i] = FILE_NUM;

}

for (int i = FILE_NUM - 1; i >= 0; i--) {

adjust(ls, data, i);

}

}

void merge_sort_by_losertree()

{

clock_t begin = clock();

FILE *fpreads[FILE_NUM]; //10个文件的描述符

Data data[FILE_NUM + 1]; //10个文件的10个当前最小数据

int ls[FILE_NUM]; //存放败者索引的节点

int index;

FILE *fpwrite = NULL;

fopen_s(&fpwrite,"sort_data_by_losertree.txt", "w");

assert(fpwrite != NULL);

for (int i = 0; i < FILE_NUM; i++)

{

fpreads[i] = open_file(i, "r");

}

for (int i = 0; i < FILE_NUM; i++)

{

fscanf_s(fpreads[i], "%d %d %s\n", &data[i].data, &data[i].num, data[i].name,_countof(data[i].name));

}

create_loser_tree(ls, data); //创建败者树

while (data[ls[0]].data != MAX)

{

index = ls[0];

fprintf(fpwrite, "%d %d %s\n", data[index].data, data[index].num, data[index].name);

result++;//测试数据是否全部读完。

fscanf_s(fpreads[index], "%d %d %s\n", &data[index].data, &data[index].num, data[index].name, _countof(data[index].name));

adjust(ls, data, index);

}

for (int i = 0; i < FILE_NUM; i++)

{

fclose(fpreads[i]);

}

fclose(fpwrite);

clock_t end = clock();

cout << "16 Path Merge Sort Use Time :" << (end - begin) / CLK_TCK << "s" << endl;

}

int _tmain(int argc, _TCHAR* argv[])

{

interior_sort();

merge_sort_by_losertree();

cout << "Merged Data:" << result << endl;

getchar();

return 0;

}

3、B树索引的建立、查找、删除的代码

#include "stdafx.h"

#include

#include

#include

#include

/**

* @brief the degree of btree

* key per node: [M-1, 2M-1]

* child per node: [M, 2M]

*/

#define M 2

#define MaxSize 1000001 typedef struct btree_node {

int k[2 * M - 1];

struct btree_node *p[2 * M];

int num;

bool is_leaf;

} btree_node;

/**

* @brief allocate a new btree node * default: is_leaf == true

*

* @return pointer of new node

*/

btree_node *btree_node_new();

/**

* @brief create a btree root

*

* @return pointer of btree root

*/

btree_node *btree_create();

/**

* @brief split child if num of key in child exceed 2M-1

*

* @param parent: parent of child

* @param pos: p[pos] points to child

* @param child: the node to be splited

*

* @return

*/

int btree_split_child(btree_node *parent, int pos, btree_node *child);

/**

* @brief insert a value into btree

* the num of key in node less than 2M-1

*

* @param node: tree root

* @param target: target to insert

*/

void btree_insert_nonfull(btree_node *node, int target);

/**

* @brief insert a value into btree

*

* @param root: tree root

* @param target: target to insert

*

* @return: new root of tree

*/

btree_node* btree_insert(btree_node *root, int target);

/**

* @brief merge y, z and root->k[pos] to left

* this appens while y and z both have M-1 keys

*

* @param root: parent node

* @param pos: postion of y

* @param y: left node to merge

* @param z: right node to merge

*/

void btree_merge_child(btree_node *root, int pos, btree_node *y, btree_node *z);

/**

* @brief delete a vlue from btree

*

* @param root: btree root

* @param target: target to delete

*

* @return: new root of tree

*/

btree_node *btree_delete(btree_node *root, int target);

/**

* @brief delete a vlue from btree

* root has at least M keys

*

* @param root: btree root

* @param target: target to delete

*

* @return

*/

void btree_delete_nonone(btree_node *root, int target);

/**

* @brief find the rightmost value

*

* @param root: root of tree

*

* @return: the rightmost value

*/

int btree_search_predecessor(btree_node *root);

/**

* @brief find the leftmost value

*

* @param root: root of tree

*

* @return: the leftmost value

*/

int btree_search_successor(btree_node *root);

/**

* @brief shift a value from z to y

*

* @param root: btree root

* @param pos: position of y

* @param y: left node

* @param z: right node

*/

void btree_shift_to_left_child(btree_node *root, int pos, btree_node *y, btree_node *z);

/**

* @brief shift a value from z to y

*

* @param root: btree root

* @param pos: position of y

* @param y: left node

* @param z: right node

*/

void btree_shift_to_right_child(btree_node *root, int pos, btree_node *y, btree_node *z);

/**

* @brief inorder traverse the btree

*

* @param root: root of treee

*/

void btree_inorder_print(btree_node *root);

/**

* @brief level print the btree

*

* @param root: root of tree

*/

void btree_level_display(btree_node *root);

btree_node *btree_node_new()

{

btree_node *node = (btree_node *)malloc(sizeof(btree_node));

if (NULL == node) {

return NULL;

}

for (int i = 0; i < 2 * M - 1; i++) {

node->k[i] = 0;

}

for (int i = 0; i < 2 * M; i++) {

node->p[i] = NULL;

}

node->num = 0;

node->is_leaf = true;

}

btree_node *btree_create()

{

btree_node *node = btree_node_new();

if (NULL == node) {

return NULL;

}

return node;

}

int btree_split_child(btree_node *parent, int pos, btree_node *child) {

btree_node *new_child = btree_node_new();

if (NULL == new_child) {

return -1;

}

new_child->is_leaf = child->is_leaf;

new_child->num = M - 1;

for (int i = 0; i < M - 1; i++) {

new_child->k[i] = child->k[i + M];

}

if (false == new_child->is_leaf) {

for (int i = 0; i < M; i++) {

new_child->p[i] = child->p[i + M];

}

}

child->num = M - 1;

for (int i = parent->num; i > pos; i--) {

parent->p[i + 1] = parent->p[i];

}

parent->p[pos + 1] = new_child;

for (int i = parent->num - 1; i >= pos; i--) {

parent->k[i + 1] = parent->k[i];

}

parent->k[pos] = child->k[M - 1];

parent->num += 1;

}

void btree_insert_nonfull(btree_node *node, int target) {

if (1 == node->is_leaf) {

int pos = node->num;

while (pos >= 1 && target < node->k[pos - 1]) { node->k[pos] = node->k[pos - 1];

pos--;

}

node->k[pos] = target;

node->num += 1;

}

else {

int pos = node->num;

while (pos > 0 && target < node->k[pos - 1]) { pos--;

}

if (2 * M - 1 == node->p[pos]->num) {

btree_split_child(node, pos, node->p[pos]);

if (target > node->k[pos]) {

pos++;

}

}

btree_insert_nonfull(node->p[pos], target);

}

}

btree_node* btree_insert(btree_node *root, int target) {

if (NULL == root) {

return NULL;

}

if (2 * M - 1 == root->num) {

btree_node *node = btree_node_new();

if (NULL == node) {

return root;

}

node->is_leaf = 0;

node->p[0] = root;

btree_split_child(node, 0, root);

btree_insert_nonfull(node, target);

return node;

}

else {

btree_insert_nonfull(root, target);

return root;

}

}

void btree_merge_child(btree_node *root, int pos, btree_node *y, btree_node *z) {

y->num = 2 * M - 1;

for (int i = M; i < 2 * M - 1; i++) {

y->k[i] = z->k[i - M];

}

y->k[M - 1] = root->k[pos];

if (false == z->is_leaf) {

for (int i = M; i < 2 * M; i++) {

y->p[i] = z->p[i - M];

}

}

for (int j = pos + 1; j < root->num; j++) {

root->k[j - 1] = root->k[j];

root->p[j] = root->p[j + 1];

}

root->num -= 1;

free(z);

}

相关文档