Last Updated:

Binary files – how to work with them?

The text is stored in its usual format. If, for example, you write 12345678 as ordinary numbers, they will take 8 bytes. And if this value begins to increase to a huge amount, then you will have to allocate a lot of memory for data storage.

However, this can be avoided. This requires the use of special binary data related to the programming language C. Just in this article will describe how to work with them and convert the text into a more convenient format.

To consider the principle of working with binary files, it is recommended to simply refer to examples. Here is one of those that are used:

#include <conio.h> ?
#include <stdio.h>
#include <stdlib.h>

#define ERROR_FILE_OPEN -3

void main() {
FILE *output = NULL;
int number;

output = fopen("D:/c/output.bin", "wb");
if (output == NULL) {
printf("Error opening file");
getch();
exit(ERROR_FILE_OPEN);
}

scanf("%d", &number);
fwrite(&number, sizeof(int), 1, output);

fclose(output);
_getch();
}

By running such a program, you can further view the contents of the binary file. The main thing is that the application in which the file is opened supports the hexadecimal system for reading information. Then you can see a simple text. And to write information to .bin file, you need to enter the command in this way:

size_t fwrite ( const void * ptr, size_t size, size_t count, FILE * stream );

Total is read in the count of elements according to the size of the size. The number of successfully read items is placed at an address called ptr. And to read the data in the opposite direction, you need to enter the following set of code lines:

#include <conio.h> ?
#include <stdio.h>
#include <stdlib.h>

#define ERROR_FILE_OPEN -3

void main() {

FILE *input = NULL;
int number;

input = fopen("D:/c/output.bin", "rb");
if (input == NULL) {
printf("Error opening file");
getch();
exit(ERROR_FILE_OPEN);
}

fread(&number, sizeof(int), 1, input);
printf("%d", number);

fclose(input);
_getch();
}

To work with binary files, it is recommended to be patient and also study the various commands in advance. The most common to study will be fseek. Here's what its application looks like:

int fseek ( FILE * stream, long int offset, int origin );

Origin in this structure can take only 3 values:

  • SEEK_SET – the beginning of the file, its first characters for information processing;
  • SEEK_CUR is the middle of the file. By definition, this is the place between the beginning and end of a text document .bin;
  • SEEK_END is the end of the file. Unfortunately, it is impossible to say exactly what the "end" of the document is, so you should not count on the use of this function in practice.

If the functions work properly, 0 will be returned.

You can also add the example described above. This time, a number will be spelled out and verification will begin from the very beginning of the document. Here's what it will look like later:

#include <conio.h>
#include <stdio.h>
#include <stdlib.h>

#define ERROR_FILE_OPEN -3

void main() {
FILE *iofile = NULL;
int number;

iofile = fopen("D:/c/output.bin", "w+b");
if (iofile == NULL) {
printf("Error opening file");
getch();
exit(ERROR_FILE_OPEN);
}

scanf("%d", &number);
fwrite(&number, sizeof(int), 1, iofile);
fseek(iofile, 0, SEEK_SET);
number = 0;
fread(&number, sizeof(int), 1, iofile);
printf("%d", number);

fclose(iofile);
_getch();
}

However, instead, you can also use the rewind function, which will change the pointer to the very beginning of the page with the text.

The function used to assign a flowing position variable is:

int fgetpos ( FILE * stream, fpos_t * pos );

This code snippet is necessary to move the pointer to the position stored in the pos variable:

int fsetpos ( FILE * stream, const fpos_t * pos );

The following function returns the current position of the indicator relative to the beginning of the file:

long int ftell ( FILE * stream );

Here's an example. A person enters 4 digits. The first 4 bytes of the file indicate how many digits the user entered. When the input ends, it is transferred to the beginning of the file, after which the number of entered particles begins to be recorded:

#include <stdio.h>
#include <conio.h>
#include <stdlib.h>

#define ERROR_OPEN_FILE -3

void main() {
FILE *iofile = NULL;
unsigned counter = 0;
int num;
int yn;

iofile = fopen("D:/c/numbers.bin", "w+b");
if (iofile == NULL) {
printf("Error opening file");
getch();
exit(ERROR_OPEN_FILE);
}

fwrite(&counter, sizeof(int), 1, iofile);
do {
printf("enter new number? [1 - yes, 2 - no]");
scanf("%d", &yn);
if (yn == 1) {
scanf("%d", &num);
fwrite(&num, sizeof(int), 1, iofile);
counter++;
} else {
rewind(iofile);
fwrite(&counter, sizeof(int), 1, iofile);
break;
}
}while(1);

fclose(iofile);
getch();
}

The second example not only reads the number of entered elements, but also displays the numbers in order:

#include <stdio.h>
#include <conio.h>
#include <stdlib.h>

#define ERROR_OPEN_FILE -3

void main() {
FILE *iofile = NULL;
unsigned counter;
int i, num;

iofile = fopen("D:/c/numbers.bin", "rb");
if (iofile == NULL) {
printf("Error opening file");
getch();
exit(ERROR_OPEN_FILE);
}

fread(&counter, sizeof(int), 1, iofile);
for (i = 0; i < counter; i++) {
fread(&num, sizeof(int), 1, iofile);
printf("%d\n", num);
}

fclose(iofile);
getch();
}

And now there will be complex examples with certain goals. The first thing that can be analyzed is the situation when it is necessary to convert text data to a binary format using the "wrapper" function. This interaction element takes the file name, access mode, function to execute after the file is opened, and arguments.

Since the latter are a large number, they can be transmitted as a pointer to the structure. After you run the function, you can close the file. There will be no need to free up memory and CPU resources. Here's the code:

#include <stdio.h>
#include <conio.h>
#include <stdlib.h>
#define DEBUG

#ifdef DEBUG
#define debug(data) printf("%s", data);
#else
#define debug(data)
#endif

const char inputFile[] = "D:/c/xinput.txt";
const char outputFile[] = "D:/c/output.bin";

struct someArgs {
int* items;
size_t number;
};

int writeToFile(FILE *file, void* args) {
size_t i;
struct someArgs *data = (struct someArgs*) args;
debug("write to file\n")
fwrite(data->items, sizeof(int), data->number, file);
debug("write finished\n")
return 0;
}

int readAndCallback(FILE *file, void* args) {
struct someArgsdata;
size_t size, i = 0;
int result;
debug("read from file\n")
fscanf(file, "%d", &size);
data.items = (int*) malloc(size*sizeof(int));
data.number = size;
while (!feof(file)) {
fscanf(file, "%d", &data.items[i]);
i++;
}
debug("call withOpenFile\n")
result = withOpenFile(outputFile, "w", writeToFile, &data);
debug("read finish\n")
free(data.items);
return result;
}

doStuff() {
return withOpenFile(inputFile, "r", readAndCallback, NULL);
}

Wrapper - The function opens the file. If the file was opened safely,
//then the fun function is called. Since the arguments can be very different,
//they are passed through the void* pointer. As an argument type
//, it is reasonable to
use the structure int withOpenFile(const char *filename,
const char *mode,
int (*fun)(FILE* source, void* args),
void* args) {
FILE *file = fopen(filename, mode);
int err;

debug("try to open file")
debug(filename)
debug("\n")

if (file != NULL) {
err = fun(file, args);
} else {
return 1;
}
debug("close file")
debug(filename)
debug("\n")
fclose(file);
return err;
}

void main() {
printf("result = %d", doStuff());
getch();
}

And the last example that needs to be analyzed is with two structures in one. The first is PersonKey with login, password, id and offset field. The second is PersonInfo, which contains the person's first name, last name, and age. The first structure is written in key.bin, and the second in values.bin. offset determines the position of information about a person in the second structure, so it is possible to take data from 1 file and get information from 2 documents in a ratio. It looks like this:

#define _CRT_SECURE_NO_WARNINGS

#include <conio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct PersonKey {
long long id;
charlogin[64];
charpassword[64];
long offset;//Position of corresponding PersonInfo values
}PersonKey;

typedef struct PersonInfo {
unsigned age;
char firstName[64];
char lastName[128];
}PersonInfo;

/*
The function queries the user for data and writes it consecutively to two files
*/
void createOnePerson(FILE *keys, FILE *values) {
static long long id = 0;
PersonKey key;
PersonInfo pininfo;

pkey.id = id++;
//Since all values ​​are written one after another, the current position
of the //pointer in the second file will be the position for the new entry
pkey.offset = ftell(values);

printf("Login: ");
scanf("%63s", pkey.login);
printf("Password: ");
scanf("%63s", pkey.password);
printf("age: ");
scanf("%d", &(pinfo.age));
printf("First Name: ");
scanf("%63s", pinfo.firstName);
printf("Last Name: ");
scanf("%127s", pinfo.lastName);

fwrite(&pkey, sizeof(pkey), 1, keys);
fwrite(&pinfo, sizeof(pinfo), 1, values);
}

void createPersons(FILE *keys, FILE *values) {
charbuffer[2];
int repeat = 1;
int counter = 0;//Number of elements in the file
//Reserve space for recording the number of elements
fwrite(&counter, sizeof(counter), 1, keys);
printf("CREATE PERSONS\n");
do {
createOnePerson(keys, values);
printf("\nYet another one? [y/n]");
scanf("%1s", buffer);
counter++;
if (buffer[0] != 'y' && buffer[0] != 'Y') {
repeat = 0;
}
} while(repeat);
//Go back to the beginning and write the number of created elements
rewind(keys);
fwrite(&counter, sizeof(counter), 1, keys);
}

/*
Create an array of keys
*/
PersonKey* readKeys(FILE *keys, int *size) {
int i;
PersonKey *out = NULL;
rewind(keys);
fread(size, sizeof(*size), 1, keys);
out = (PersonKey*) malloc(*size * sizeof(PersonKey));
fread(out, sizeof(PersonKey), *size, keys);
return out;
}

/*
The function opens two files at once. To simplify the task, we return an array of files.
*/
FILE** openFiles(const char *keysFilename, const char *valuesFilename) {
FILE **files = (FILE**)malloc(sizeof(FILE*)*2);
files[0] = fopen(keysFilename, "w+b");
if (!files[0]) {
return NULL;
}
files[1] = fopen(valuesFilename, "w+b");
if (!files[1]) {
fclose(files[0]);
return NULL;
}
return files;
}

/*
Two helper functions for deriving key and information
*/
void printKey(PersonKey pk) {
printf("%d. %s [%s]\n", (int)pk.id, pk.login, pk.password);
}

void printInfo(PersonInfo info) {
printf("%d %s %s\n", info.age, info.firstName, info.lastName);
}

/*
Function by key (or rather, by its offset field)
gets the desired value from the second file
*/
PersonInfo readInfoByPersonKey(PersonKey pk, FILE *values) {
PersonInfo out;
rewind(values);
fseek(values, pk.offset, SEEK_SET);
fread(&out, sizeof(PersonInfo), 1, values);
return out;
}

void getPersonsInfo(PersonKey *keys, FILE *values, int size) {
int index;
PersonInfop;
do {
printf("Enter position of element. To exit print bad index: ");
scanf("%d", &index);
if (index < 0 || index >= size) {
printf("bad index");
return;
}
p = readInfoByPersonKey(keys[index], values);
printInfo(p);
}while(1);
}

void main() {
intsize;
int i;
PersonKey *keys = NULL;
FILE **files = openFiles("C:/c/keys.bin", "C:/c/values.bin");
if (files == 0) {
printf("Error opening files");
goto FREE;
}
createPersons(files[0], files[1]);
keys = readKeys(files[0], &size);

for (i = 0; i < size; i++) {
printKey(keys[i]);
}

getPersonsInfo(keys, files[1], size);

fclose(files[0]);
fclose(files[1]);
FREE:
free(files);
free(keys);
_getch();
}

It is very useful to apply such a structure with a large volume of PersonInfo. Still, there is a lot of data, and with the help of such a code it is possible to quickly find the user by the already available information.