Hi there, in reality, do you know?
in a word, What is a wav file? And explicitly the wav file format?
I am using c/c++ to explain the audio file format in full detail
Sound storage in computer files
Sound files provide a platform for computer’s to manipulate audio by music signal processing, without worrying complex task such as ADC/DAC
Digital Encoding
There are different types of digital encoding some use compress data other use uncompressed data but the most popular encoding is PCM(Pulse Code Modulation)
Sample precision
in short, How many bits(or) bytes used in Audio data for encoding each sample
to put it another way, Usually, the PCM format sound file will use single(8 bit), two(16 bit), three, or four byte’s sample’s to represent the sample’s
so far, Single byte(8 bit) audio is not used in professional audio recording and it is char type range from 0….255
Two bytes (16 bit) audio is a standard CD/DAT precision and it is a short type ranges from -32,768 to 32,767 so the SNR for CD format will be 96 dB
16-bit precision is good for most of the audio application and also it is a famous norm
Three bytes and four-byte audio sample will be saved as long type because in C we won’t have 3byte (24-bit) data type
RIFF(Resource Interchange File Format) wave sound files
in brief, the RIFF file contains the wave file format which is a chunked file format and it is native to little-endian processor’s
in essence, chunks are a portion of data and each portion of a chunk will have a different information
on the contrary, every Chunk will have it’s own mini header and ID
The structure of a wav file format
RIFF chunk
Chunk ID: which will be in ASCII character’s RIFF
Chunk data size: Chunk size –this header size
RIFF TYPE ID: which will be ASCII character’s WAVE
Format subchunk
chunk ID: contain letters fmt
Remember in reality, “fmt ” have four characters with space included
chunk data size: the size of the format chunk
Format Tag :It is a code, where 1 standard for PCM other codes for different encoding
Channels :No of interleaved audio channels available
Sampling rate :Samples per second
Byte rate: Bytes per second for data transmission
Channels X samples per second X bits per sample/8
Block Align : is the arrangement of data in a single sample
Channels X bits per sample/8
Bits per sample: no of bits in a sample
data subchunk
chunk ID: contain letters data
chunk data size: the size of the audio data in bytes
Byte ordering the problem of data portability b/w different machines
The byte ordering confusion came into existence because of different byte ordering conventions used in different machines
For instance, intel byte ordering uses little-endian and Motorola byte ordering uses big-endian
in sum, There are two types endianness
- Little-endian: store the least significant byte (LSB) first
- Big-endian: store the most significant byte (MSB) first
Soundfile formats are either little-endian or big-endian
Check your processor endianness
#include <stdio.h>
#include <stdlib.h>
int main()
{
unsigned int j=1;
char *x =(char*)&j;
{
if (*x==0)
printf(" you have big endian byte ordering machine\n");
}
if (*x==1)
printf("you have little endian byte ordering machine\n");
return 0;
}
Output

check the size of .wav file
#include <iostream>
using namespace std;
int main()
{
const char* filePath;
int length;
string input;
{
cout << "Enter the wave file name: ";
cin >> input;
cin.get();
filePath = input.c_str();
}
FILE* wavFile = fopen(filePath, "r");
if (wavFile == NULL)
{
fprintf(stderr, "file cannot be open %s \n", filePath);
}
{
fseek(wavFile, 0, SEEK_END);
length = ftell(wavFile);
fclose(wavFile);
printf("Total size of the wav file = %d bytes\n", length);
return 0;
}
}
Output

Check the Header size of the .Wav file
#include <iostream>
using namespace std;
int main()
{
typedef struct WAV_HEADER
{
// RIFF Chunk
uint8_t Chunk_ID[4]; // RIFF
uint32_t Chunk_data_Size; // RIFF Chunk data Size
uint8_t RIFF_TYPE_ID[4]; // WAVE
// format sub-chunk
uint8_t Chunk1_ID[4]; // fmt
uint32_t chunk1_data_Size; // Size of the format chunk
uint16_t Format_Tag; // format_Tag 1=PCM
uint16_t Num_Channels; // 1=Mono 2=Sterio
uint32_t Sample_rate; // Sampling Frequency in (44100)Hz
uint32_t byte_rate; // Byte rate
uint16_t block_Align; // 4
uint16_t bits_Per_Sample; // 16
/* "data" sub-chunk */
uint8_t chunk2_ID[4]; // data
uint32_t chunk2_data_Size; // Size of the audio data
} obj;
obj header;
const char* filePath;
string input;
{
cout << "Enter the wave file name: ";
cin >> input;
cin.get();
filePath = input.c_str();
}
FILE* fp = fopen(filePath, "r");
if (fp == NULL)
{
fprintf(stderr, " file cannot be open %s \n", filePath);
}
{
size_t num = fread(&header, 1, sizeof(header), fp);
cout << "Header size " << num << " bytes." << endl;
return 0;
}
}
Output

Reading RIFF Chunk of .wav FILE
#include <iostream>
using namespace std;
int main()
{
typedef struct WAV_HEADER
{
// RIFF Chunk
uint8_t Chunk_ID[4]; // RIFF
uint32_t Chunk_data_Size; // RIFF Chunk data Size
uint8_t RIFF_TYPE_ID[4]; // WAVE
// format sub-chunk
uint8_t Chunk1_ID[4]; // fmt
uint32_t chunk1_data_Size; // Size of the format chunk
uint16_t Format_Tag; // format_Tag 1=PCM
uint16_t Num_Channels; // 1=Mono 2=Sterio
uint32_t Sample_rate; // Sampling Frequency in (44100)Hz
uint32_t byte_rate; // Byte rate
uint16_t block_Align; // 4
uint16_t bits_Per_Sample; // 16
/* "data" sub-chunk */
uint8_t chunk2_ID[4]; // data
uint32_t chunk2_data_Size; // Size of the audio data
} obj;
obj header;
const char* filePath;
string input;
{
cout << "Enter the wave file name: ";
cin >> input;
cin.get();
filePath = input.c_str();
}
FILE* fp = fopen(filePath, "r");
if (fp == NULL)
{
fprintf(stderr, " file cannot be open %s \n", filePath);
}
{
// Read RIFF Chunk
fread(&header, 1, sizeof(header), fp);
cout << "Chunk ID :" << header.Chunk_ID[0] << header.Chunk_ID[1] << header.Chunk_ID[2] << header.Chunk_ID[3] << endl;
cout << "Chunk data Size :" << header.Chunk_data_Size << endl;
cout << "RIFF TYPE ID :" << header.RIFF_TYPE_ID[0] << header.RIFF_TYPE_ID[1] << header.RIFF_TYPE_ID[2] << header.RIFF_TYPE_ID[3] << endl;
return 0;
}
}
Output

Reading RIFF Chunk and format sub Chunk of .wav FILE
#include <iostream>
using namespace std;
int main()
{
typedef struct WAV_HEADER
{
// RIFF Chunk
uint8_t Chunk_ID[4]; // RIFF
uint32_t Chunk_data_Size; // RIFF Chunk data Size
uint8_t RIFF_TYPE_ID[4]; // WAVE
// format sub-chunk
uint8_t Chunk1_ID[4]; // fmt
uint32_t Chunk1_data_Size; // Size of the format chunk
uint16_t Format_Tag; // format_Tag 1=PCM
uint16_t Num_Channels; // 1=Mono 2=Sterio
uint32_t Sample_rate; // Sampling Frequency in (44100)Hz
uint32_t byte_rate; // Byte rate
uint16_t block_Align; // 4
uint16_t bits_per_sample; // 16
/* "data" sub-chunk */
uint8_t chunk2_ID[4]; // data
uint32_t chunk2_data_Size; // Size of the audio data
} obj;
obj header;
const char* filePath;
string input;
{
cout << "Enter the wave file name: ";
cin >> input;
cin.get();
filePath = input.c_str();
}
FILE* fp = fopen(filePath, "r");
if (fp == NULL)
{
fprintf(stderr, " file cannot be open %s \n", filePath);
}
{
// Read RIFF Chunk
fread(&header, 1, sizeof(header), fp);
cout << "Chunk ID :" << header.Chunk_ID[0] << header.Chunk_ID[1] << header.Chunk_ID[2] << header.Chunk_ID[3] << endl;
cout << "Chunk data Size :" << header.Chunk_data_Size << endl;
cout << "RIFF TYPE ID :" << header.RIFF_TYPE_ID[0] << header.RIFF_TYPE_ID[1] << header.RIFF_TYPE_ID[2] << header.RIFF_TYPE_ID[3] << endl;
// format subchunk
cout<<"-------------------------------------------------------------------------------"<<endl;
cout << "Chunk1_ID :" << header.Chunk1_ID[0] << header.Chunk1_ID[1] << header.Chunk1_ID[2] << header.Chunk1_ID[3] << endl;
cout << "Chunk1 data Size :" << header.Chunk1_data_Size << endl;
cout << "Format Tag :" << header.Format_Tag << endl;
cout << "Num_Channels :" << header.Num_Channels << endl;
cout << "Sample_rate :" << header.Sample_rate << endl;
cout << "byte_rate :" << header.byte_rate << endl;
cout << "block_Align :" << header.block_Align << endl;
cout << "bits per sample :" << header.bits_per_sample << endl;
return 0;
}
}
Output

Reading RIFF Chunk, format sub Chunk and data sub Chunk of a .wav FILE
#include <iostream>
using namespace std;
int main()
{
typedef struct WAV_HEADER
{
// RIFF Chunk
uint8_t Chunk_ID[4]; // RIFF
uint32_t Chunk_data_Size; // RIFF Chunk data Size
uint8_t RIFF_TYPE_ID[4]; // WAVE
// format sub-chunk
uint8_t Chunk1_ID[4]; // fmt
uint32_t Chunk1_data_Size; // Size of the format chunk
uint16_t Format_Tag; // format_Tag 1=PCM
uint16_t Num_Channels; // 1=Mono 2=Sterio
uint32_t Sample_rate; // Sampling Frequency in (44100)Hz
uint32_t byte_rate; // Byte rate
uint16_t block_Align; // 4
uint16_t bits_per_sample; // 16
/* "data" sub-chunk */
uint8_t Chunk2_ID[4]; // data
uint32_t Chunk2_data_Size;
// Size of the audio data
} obj;
obj header;
const char* filePath;
string input;
{
cout << "Enter the wave file name: ";
cin >> input;
cin.get();
filePath = input.c_str();
}
FILE* fp = fopen(filePath, "r");
if (fp == NULL)
{
fprintf(stderr, " file cannot be open %s \n", filePath);
}
{
// Read RIFF Chunk
fread(&header, 1, sizeof(header), fp);
cout << "Chunk ID :" << header.Chunk_ID[0] << header.Chunk_ID[1] << header.Chunk_ID[2] << header.Chunk_ID[3] << endl;
cout << "Chunk data Size :" << header.Chunk_data_Size << endl;
cout << "RIFF TYPE ID :" << header.RIFF_TYPE_ID[0] << header.RIFF_TYPE_ID[1] << header.RIFF_TYPE_ID[2] << header.RIFF_TYPE_ID[3] << endl;
// format subchunk
cout<<"-------------------------------------------------------------------------------"<<endl;
cout << "Chunk1_ID :" << header.Chunk1_ID[0] << header.Chunk1_ID[1] << header.Chunk1_ID[2] << header.Chunk1_ID[3] << endl;
cout << "Chunk1 data Size :" << header.Chunk1_data_Size << endl;
cout << "Format Tag :" << header.Format_Tag << endl;
cout << "Num_Channels :" << header.Num_Channels << endl;
cout << "Sample_rate :" << header.Sample_rate << endl;
cout << "byte_rate :" << header.byte_rate << endl;
cout << "block_Align :" << header.block_Align << endl;
cout << "bits per sample :" << header.bits_per_sample << endl;
cout<<"-------------------------------------------------------------------------------"<<endl;
cout << "Chunk2_ID :" << header.Chunk2_ID[0] << header.Chunk2_ID[1] << header.Chunk2_ID[2] << header.Chunk2_ID[3] << endl;
cout << "Chunk2 data Size :" << header.Chunk2_data_Size << endl;
return 0;
}
}
Output

On this occasion, let’s take four different .wav files and mix them to create a single .wav file
Reggae.wav
in the first place, take a file called Moonlight Beach by Kevin MacLeod
Link: https://incompetech.filmmusic.io/song/6671-moonlight-beach
License: http://creativecommons.org/licenses/by/4.0/
African.wav
in the second place, take a file called Rite of Passage by Kevin MacLeod
Link: https://incompetech.filmmusic.io/song/4291-rite-of-passage
License: http://creativecommons.org/licenses/by/4.0/
Piano.wav
in the third place, take a file called A Very Brady Special by Kevin MacLeod
Link: https://incompetech.filmmusic.io/song/5760-a-very-brady-special
License: http://creativecommons.org/licenses/by/4.0/
Jazz.wav
in the fourth place, take a file called Smooth Lovin by Kevin MacLeod
Link: https://incompetech.filmmusic.io/song/4379-smooth-lovin
License: http://creativecommons.org/licenses/by/4.0/
In a word, we created a single.wav from four different .wav files
that is to say, The Mix of Reggae, African, Jazz, and Piano
this time, Mix wav files and Reading and Writing .Wav file
Note:on the whole, the Reggae.wav, African.wav, Piano.wav,Jazz.wav have sample rate 44100HZ and all the four audio files have the mono channel
to clarify, A tool called Audacity is used to convert a stereo .wav to mono.wav file
main.cpp
#include <vector>
#include <string>
#include <cassert>
#include "wav.h"
using namespace std;
const int SAMPLE_RATE = 44100;
const string IN_DIR = "C:\\Users\\Mohammed Anees\\Desktop\\New folder (2)\\InputFiles\\";
const string OUT_DIR = "C:\\Users\\Mohammed Anees\\Desktop\\New folder (2)\\OutputFiles\\";
// Read a mono audio file, write to an output file.
void readWriteAudio()
{
printf("readWriteAudio\n");
// Read the input file
vector<float> sourceBuf;
int sr;
int numCh;
string sourcePath = IN_DIR + "Tone.wav";
audioRead(sourcePath, sourceBuf, sr, numCh);
auto numFrames = sourceBuf.size();
// Set up an output buffer
vector<float> outBuf(numFrames);
// For each output frame
for (size_t i = 0; i < numFrames; i++)
{
// Get input sample
float x = sourceBuf[i];
// Copy it to the output buffer
float amp_gain = 0.5;
outBuf[i] = amp_gain * x;
}
// Write the audio to file
string outPath = OUT_DIR + "Tone Copy.wav";
audioWrite(outPath, outBuf, SAMPLE_RATE);
}
void mixAudioFiles()
{
printf("mix all Audio Files\n");
// Set up buffers for all the audio files
vector<float> Reggae;
vector<float> African;
vector<float> Piano;
vector<float> Jazz;
// Read the audio files
int sr;
int numCh;
audioRead(IN_DIR + "AcesHigh/Reggae.wav", Reggae, sr, numCh);
audioRead(IN_DIR + "AcesHigh/African.wav", African, sr, numCh);
audioRead(IN_DIR + "AcesHigh/Piano.wav", Piano, sr, numCh);
audioRead(IN_DIR + "AcesHigh/Jazz.wav", Jazz, sr, numCh);
// Set up an output buffer Or set the output length of a wav file
float durSecs = 40.0;
int numFrames = SAMPLE_RATE*durSecs;
vector<float> outBuf(numFrames);
// For each output frame
for (int i = 0; i < numFrames; i++)
{
// Add each sample of all files one by one
float sum = 0.0;
sum =sum+ Reggae[i];
sum =sum+ African[i];
sum =sum+ Piano[i];
sum =sum+ Jazz[i];
outBuf[i] = sum;
}
// Write the audio
string outPath = OUT_DIR + "Mix.wav";
audioWrite(outPath, outBuf, SAMPLE_RATE);
}
int main()
{
mixAudioFiles();
// readWriteAudio();
return 0;
}
wav.cpp
#include <cstring>
#include <cassert>
#include <fstream>
#include"wav.h"
#include <climits>
using namespace std;
static const int BITS_PER_SAMPLE = 16;
static const int MIN_SAMPLE_RATE = 8000;
static const int MAX_SAMPLE_RATE = 96000;
static void deinterleaveAudio(const vector<float>& interleaved, vector<vector<float>>& split, int numCh)
{
assert(numCh > 0 && numCh <= 2);
float* srcPtr = (float*)interleaved.data();
int n = (int)interleaved.size()/numCh;
split.resize(numCh);
for (int ch = 0; ch < numCh; ch++)
split[ch].resize(n);
for (int i = 0; i < n; i++)
{
for (int ch = 0; ch < numCh; ch++)
split[ch][i] = *srcPtr++;
}
}
static void interleaveAudio(const vector<vector<float>>& split, vector<float>& interleaved)
{
int numCh = (int)split.size();
assert(numCh > 0 && numCh <= 2);
int n = (int)split[0].size();
interleaved.resize(n*numCh);
float *outP = interleaved.data();
for (int i = 0; i < n; i++)
{
for (int ch = 0; ch < numCh; ch++)
*outP++ = split[ch][i];
}
}
void checkProcessorEndianness()
{
int j = 1;
bool littleEndian = (*(char*)(&j) == 1) ? true : false;
assert(littleEndian);
}
// Returns true if 4-character codes are equal
bool Equal (const char a[4], const char b [4])
{
return (strncmp(a, b, 4) == 0);
}
// Reads a wav file header
// If successful, return the number of samples
int readWavHeader(
istream &fp,
int &Sample_Rate,
int &numSamples,
short &Num_Channels,
short &Bits_Per_Sample )
{
checkProcessorEndianness();
//assert( fp != NULL );
assert( sizeof(int) == 4 );
assert( sizeof(short) == 2 );
// check Chunk ID is RIFF from wav structure
char chunk_ID[4];
fp.read(chunk_ID, sizeof(chunk_ID));
assert(fp.gcount() == sizeof(chunk_ID));
assert( Equal(chunk_ID, "RIFF"));
// check Chunk data size of RIFF Chunk
int chunk_data_Size;
fp.read((char*)&chunk_data_Size, sizeof(chunk_data_Size));
assert(fp.gcount() == sizeof(chunk_data_Size));
// check RIFF Type ID is letter WAVE from wav structure
fp.read(chunk_ID, sizeof(chunk_ID));
assert(fp.gcount() == sizeof(chunk_ID));
assert(Equal(chunk_ID, "WAVE"));
//----------------------- format subchunk---------------------------
char chunk1_ID[4];
fp.read(chunk1_ID, sizeof(chunk1_ID));
assert(fp.gcount() == sizeof(chunk1_ID));
assert( Equal(chunk1_ID, "fmt "));
// check Chunk data size of RIFF Chunk
int chunk1_data_Size;
fp.read((char*)&chunk1_data_Size, sizeof(chunk1_data_Size));
assert(fp.gcount() == sizeof(chunk1_data_Size));
short Format_Tag;
fp.read((char*)&Format_Tag, sizeof(Format_Tag));
assert(fp.gcount() == sizeof(Format_Tag));
assert(Format_Tag == 1);
fp.read((char*)&Num_Channels, sizeof(Num_Channels));
assert(fp.gcount() == sizeof(Num_Channels));
assert(Num_Channels == 1 || Num_Channels == 2);
fp.read((char*)&Sample_Rate, sizeof(Sample_Rate));
assert(fp.gcount() == sizeof(Sample_Rate));
assert( Sample_Rate >= 8000 && Sample_Rate <= 96000 );
int Byte_Rate;
fp.read((char*)&Byte_Rate, sizeof(Byte_Rate));
assert(fp.gcount() == sizeof(Byte_Rate));
short Block_Align;
fp.read((char*)&Block_Align, sizeof(Block_Align));
assert(fp.gcount() == sizeof(Block_Align));
fp.read((char*)&Bits_Per_Sample, sizeof(Bits_Per_Sample));
assert(fp.gcount() == sizeof(Bits_Per_Sample));
assert( Bits_Per_Sample % 8 == 0);
assert( Byte_Rate == Sample_Rate * Num_Channels * Bits_Per_Sample/8 );
assert( Block_Align == Num_Channels * Bits_Per_Sample/8 );
// -- "data" subchunk --
char chunk2_ID[4];
fp.read(chunk2_ID, sizeof(chunk2_ID));
assert(fp.gcount() == sizeof(chunk2_ID));
assert( Equal(chunk2_ID, "data"));
// check Chunk data size of RIFF Chunk
uint32_t chunk2_data_Size;
fp.read((char*)&chunk2_data_Size, sizeof(chunk2_data_Size));
assert(fp.gcount() == sizeof(chunk2_data_Size));
numSamples = chunk2_data_Size / Block_Align;
return numSamples;
}
// Write the wav header
int writeWavHeader(
ostream& fp,
const int sampleRate,
const int numSamples,
const short numChannels,
const short bitsPerSample )
{
checkProcessorEndianness();
assert( sampleRate >= 8000 && sampleRate <= 96000 );
assert( numSamples >= 0 );
assert( numChannels == 1 || numChannels == 2 );
assert( bitsPerSample == 8 || bitsPerSample == 16 );
assert( sizeof(int) == 4 );
assert( sizeof(short) == 2 );
fp.write("RIFF", 4);
int Chunk2_data_Size = numSamples*numChannels*bitsPerSample/8;
int chunkSize = 36 + Chunk2_data_Size;
fp.write((char*)&chunkSize, sizeof(chunkSize));
fp.write("WAVE", 4);
//----------------------- format subchunk---------------------------
fp.write("fmt ", 4);
int subChunk1Size =16;
fp.write((char*)&subChunk1Size, sizeof(subChunk1Size));
short Format_Tag = 1;
fp.write((char*)&Format_Tag, sizeof(Format_Tag));
fp.write((char*)&numChannels, sizeof(numChannels));
fp.write((char*)&sampleRate, sizeof(sampleRate));
//Channels X samples per second X bits per sample/8
int byteRate = sampleRate * numChannels * bitsPerSample/8;
fp.write((char*)&byteRate, sizeof(byteRate));
//Channels X bits per sample/8
short blockAlign = numChannels * bitsPerSample/8;
fp.write((char*)&blockAlign, sizeof(blockAlign));
fp.write((char*)&bitsPerSample, sizeof(bitsPerSample));
// -- "data" subchunk --
fp.write("data", 4);
fp.write((char*)&Chunk2_data_Size, sizeof(Chunk2_data_Size));
return 0;
}
// Write an audio file.
void audioWrite(const string& path, const vector<float> &x, int sr, int numCh)
{
// Open the output wav file
ofstream outStream(path.c_str(), ios::out | ios::binary);
assert(outStream.is_open());
// Get the number of samples
int numSamples = (int)x.size()/numCh;
assert(numSamples > 0);
// Write the wav header
writeWavHeader(outStream, sr, numSamples, numCh, BITS_PER_SAMPLE);
// Convert the samples from normalized [-1.0, 1.0] floating-point to 16-bit shorts
vector<short> shortBuf(numSamples*numCh);
float *src = (float*)x.data();
short *dst = shortBuf.data();
for (int i = 0; i < numSamples*numCh; i++)
{
float flt = *src++;
flt = min<float>(flt, 1.0);
flt = max<float>(flt, -1.0);
*dst++ = SHRT_MAX * flt;// SHRT_MAX =+32767
}
// Write the audio data
outStream.write((char*)shortBuf.data(), numCh*numSamples*sizeof(shortBuf[0]));
outStream.flush();
// Close the file
outStream.close();
}
// Write an audio file starting from split (non-interleaved) data
void audioWrite(const string& path, const vector<vector<float>> &x, int sr)
{
vector<float> interleaved;
interleaveAudio(x, interleaved);
audioWrite(path, interleaved, sr, (int)x.size());
}
// Read an audio file
void audioRead(const string& path, vector<float> &x, int &sr, int &numCh)
{
checkProcessorEndianness();
// Open the input wav file
ifstream inStream(path.c_str(), ios::in | ios::binary);
if (!inStream.is_open())
{
printf("Couldn't open %s\n", path.c_str());
assert(inStream.is_open());
}
// Read the wav header
int numSamples;
short bitsPerSample;
short numChannels;
readWavHeader(inStream, sr, numSamples, numChannels, bitsPerSample);
numCh = numChannels;
assert(sr >= MIN_SAMPLE_RATE && sr <= MAX_SAMPLE_RATE);
assert(numCh >= 1);
assert(bitsPerSample % 8 == 0);
int bytesPerSample = bitsPerSample / 8;
// Adjust output vector to correct size to accomodate the samples
x.resize(numCh*numSamples);
// Read all the audio data. Since we'll be reading in in multiple formats, we read
// it as unsigned bytes
vector<uint8_t> audioData(numChannels*numSamples*bytesPerSample);
inStream.read((char*)audioData.data(), numCh*numSamples*bytesPerSample);
// Convert to normalized [-1,1] floating point
// Get absolute maximum sample value, e.g. for 16-bit audio abs max value is 2^15 = 32768.
float maxSample = 1 << (bitsPerSample-1);
// Get scale factor to apply to normalize sum of samples across channels in a single sample frame.
float scale = 1.0 / maxSample;
uint8_t *src = audioData.data();
for (int i = 0; i < numCh*numSamples; i++)
{
int32_t sample = 0;
uint8_t* sampleBytes = (uint8_t*)(&sample); // We assume this is little-endian!
// Read in the sample value byte-by-byte. Sample is assumed to be stored in
// little-endian format in the file, so least-significant byte is always first.
for (int b = 0; b < bytesPerSample; b++)
{
sampleBytes[b] = *src++;
}
// If it's 1 byte (8 bits) per sample
if (bytesPerSample == 1)
{
// By convention, 8-bit audio is *unsigned* with sample values from 0 to 255. To
// make it signed, we need to translate 0 to -128.
sample += CHAR_MIN;// CHAR_MIN = -128
}
// Otherwise, if the most-significant bit of most-significant byte is 1, then
// sample is negative, so we need to set the upper bytes to all 1s.
//Hex – 0XFF is 11111111
//Hex –0x80 is 10000000
else if (sampleBytes[bytesPerSample-1] & 0x80) //Reads the MSB sign bit
{
for (size_t b = bytesPerSample; b < sizeof(sample); b++)
{
sampleBytes[b] = 0xFF;
}
}
// Apply scale to get normalized mono sample value for this sample frame
x[i] = scale * sample;
}
// Close the file
inStream.close();
}
// Read into split (not interleaved) buffers
void audioRead(const string& path, vector<vector<float>> &x, int &sr)
{
int numCh;
vector<float> interleaved;
audioRead(path, interleaved, sr, numCh);
deinterleaveAudio(interleaved, x, numCh);
}
wav.h
#ifndef __Wav__
#define __Wav__
#include <vector>
#include <string>
using namespace std;
/// Writes an audio file from an interleaved buffer
void audioWrite(const string& path, const vector<float> &x, int sr, int numCh = 1);
/// Writes an audio file from non-interleaved audio
void audioWrite(const string& path, const vector<vector<float>> &x, int sr);
/// Reads an audio file into interleaved buffer
void audioRead(const string& path, vector<float> &x, int &sr, int &numCh);
/// Reads an audio file into non-interleaved buffers
void audioRead(const string& path, vector<vector<float>> &x, int &sr);
/// @parameter path Path to audio file
/// @parameter x Audio data. Will be clipped if outside [-1, 1] range.
/// @parameter sr Sample rate of audio data
/// @parameter numCh Number of channels
#endif
Reference
- The Audio Programming Book by Richard Boulanger and Victor Lazzarini
Leave a Reply