Skip to main content

Table 2 Ultra-frequent n-mers in the human genome

From: Sequence space coverage, entropy of genomes and the potential to detect non-human DNA in human samples

5-mer

Freq

6-mer

Freq

7-mer

Freq

8-mer

Freq

AAAAA

38,658,471

AAAAAA

19,638,479

AAAAAAA

12,559,969

AAAAAAAA

9,155,123

ATTTT

23,349,997

AAAAAT

9,299,025

AAATAAA

3,521,836

ATATATAT

1,863,212

TATTT

19,344,297

TATTTT

8,283,181

AAAAAAT

3,335,157

TGTGTGTG

1,701,426

AGAAA

18,271,461

AAATAA

7,271,302

AAAGAAA

3,255,464

TTTTAAAA

1,574,448

AAATT

16,119,174

AGAAAA

7,027,241

TTTTAAA

3,218,950

AAAATAAA

1,562,386

TTATT

15,608,707

TTTATT

7,015,408

TTATTTT

3,132,222

AAAGAAAA

1,467,003

TTTTC

15,579,017

TTTTAA

6,910,624

ATATATA

3,034,879

AAAAGAAA

1,444,255

CAAAA

15,364,161

TTTCTT

6,890,187

TTAAAAA

2,961,520

AAAAATTA

1,392,585

TTCTT

15,093,160

TTTAAA

6,820,966

TTTTCTT

2,913,461

TAAAAATA

1,363,753

TCTTT

15,053,533

AAAATT

6,559,595

TTTTATT

2,814,631

CCAGCCTG

1,308,899

CTTTT

14,643,705

TTCTTT

6,476,513

TAAAAAT

2,811,575

CAGCCTGG

1,266,975

CATTT

13,897,384

ATAAAA

6,284,494

AAAAATT

2,773,210

CTTTTTTT

1,238,232

AAACA

13,686,083

TTTTTG

5,979,892

TTCTTTT

2,693,599

AAAAAGAA

1,228,463

TTTGT

13,634,981

ATTTTA

5,829,423

AGAAAAA

2,614,232

CAAAAAAA

1,223,343

TAAAT

13,334,177

AAATAT

5,784,598

AAAATTA

2,572,747

AAAACAAA

1,216,247

ATATA

13,333,472

AAAAGA

5,708,574

ACAAAAA

2,554,220

TTTTTCTT

1,175,794

TGAAA

13,099,712

TTTTGT

5,622,008

TTTGTTT

2,514,354

ATTTATTT

1,171,982

ATATT

13,067,844

TATATA

5,551,066

TGTGTGT

2,500,827

TTTTGTTT

1,155,387

AAAAC

12,161,846

TGTTTT

5,550,449

TTTTTTA

2,460,695

TTAAAAAA

1,145,762

AGAGA

12,078,839

CTTTTT

5,515,339

TTTTTTG

2,436,880

AAAAAATA

1,143,587

9-mer

Freq

10-mer

Freq

  

11-mer

Freq

AAAAAAAAA

7,276,886

AAAAAAAAAA

5,952,617

  

AAAAAAAAAAA

4,945,619

TGTGTGTGT

1,424,846

TGTGTGTGTG

1,168,929

  

TGTGTGTGTGT

1,067,659

ATATATATA

1,253,711

ATATATATAT

967,302

  

CTGTAATCCCA

804,201

CCAGGCTGG

1,043,029

TGTAATCCCA

857,278

  

ATATATATATA

785,762

CTGGGATTA

921,482

GCTGGGATTA

840,776

  

TGTAATCCCAG

782,505

CCTGTAATC

917,990

GGATTACAGG

835,847

  

CCTGTAATCCC

762,833

GGATTACAG

916,302

CTGTAATCCC

826,256

  

GTAATCCCAGC

744,961

GAGGCTGAG

911,055

GGAGGCTGAG

810,203

  

CCTCAGCCTCC

729,303

GCTGGGATT

899,822

CTGGGATTAC

802,982

  

GAGGCTGAGGC

602,716

TGTAATCCC

897,744

CCTCAGCCTC

795,109

  

GCCTGTAATCC

579,838

GGAGGCTGA

891,406

AGGCTGAGGC

644,447

  

AAAATACAAAA

564,984

GTAATCCCA

887,639

CCAGCCTGGG

644,416

  

CCTCCCAAAGT

561,106

AGGCTGAGG

881,036

GCCTGTAATC

629,425

  

GGGAGGCTGAG

556,004

TTGGGAGGC

847,581

AAAAAAAAAG

626,192

  

AGGCTGAGGCA

555,758

CTTTTTTTT

794,742

TTTGTATTTT

619,817

  

CACTTTGGGAG

554,429

TTTTATTTT

780,751

TTTGGGAGGC

619,461

  

CTTTGGGAGGC

553,151

CAAAAAAAA

758,731

AAATACAAAA

611,794

  

AGTAGCTGGGA

546,426

AAAAAAAAT

741,719

CAAAAAAAAA

602,719

  

AAAAATACAAA

542,436

AAATACAAA

740,229

ACTTTGGGAG

600,711

  

CAGGCTGGAGT

536,630

CCCAGGCTG

740,090

TCAGCCTCCC

600,206

  

TCCCAAAGTGC

534,617

12-mer

Freq

13-mer

Freq

  

14-mer

Freq

AAAAAAAAAAAA

4,144,156

AAAAAAAAAAAAA

3,468,084

  

AAAAAAAAAAAAAA

2,889,704

TGTGTGTGTGTG

928,266

TGTGTGTGTGTGT

867,556

  

GTGTGTGTGTGTGT

775,928

TGGGATTACAGG

744,980

CTGTAATCCCAGC

687,709

  

CCTGTAATCCCAGC

639,010

CTGGGATTACAG

737,944

CTGGGATTACAGG

684,576

  

ATATATATATATAT

503,574

GCTGGGATTACA

727,608

ATATATATATATA

571,291

  

CTGGGATTACAGGC

478,945

ATATATATATAT

664,692

GCCTGTAATCCCA

520,611

  

AGCACTTTGGGAGG

459,948

GGAGGCTGAGGC

562,149

GCCTCCCAAAGTG

493,408

  

GCACTTTGGGAGGC

448,768

GCCTGTAATCCC

532,230

GGAGGCTGAGGCA

489,998

  

AAGTGCTGGGATTA

448,388

GCCTCCCAAAGT

527,552

CTCCCAAAGTGCT

486,025

  

AAAGTGCTGGGATT

445,915

TGCCTCAGCCTC

523,666

GCACTTTGGGAGG

474,289

  

CTCCCAAAGTGCTG

443,681

CCTCCCAAAGTG

522,504

CAGCACTTTGGGA

471,479

  

GGAGGCTGAGGCAG

437,319

TCCCAAAGTGCT

517,353

AGTGCTGGGATTA

468,425

  

CAAAGTGCTGGGAT

436,625

AAAAATACAAAA

513,200

TGCACTCCAGCCT

466,972

  

CAGGCTGGAGTGCA

436,504

GGGAGGCTGAGG

507,919

GAGGCTGAGGCAG

465,893

  

CCAGCACTTTGGGA

434,713

GCACTTTGGGAG

501,469

AAGTGCTGGGATT

464,827

  

TGCTGGGATTACAG

428,229

CCAGGCTGGAGT

499,800

ATCCCAGCACTTT

460,277

  

CCAGGCTGGAGTGC

425,904

AGGCTGAGGCAG

492,943

GCACTCCAGCCTG

454,751

  

CCTGCCTCAGCCTC

423,801

AGGCTGGAGTGC

486,794

TGTAATCCCAGCA

453,696

  

TCCCAGCACTTTGG

423,684

AGTGCTGGGATT

486,236

CCAGGCTGGAGTG

453,312

  

TGTAATCCCAGCAC

417,426

CAGGCTGGAGTG

484,927

TCCCAGCACTTTG

448,301

  

AGTGCTGGGATTAC

416,557

15-mer

Freq

16-mer

Freq

  

17-mer

Freq

AAAAAAAAAAAAAAA

2,397,399

AAAAAAAAAAAAAAAA

1,981,757

  

AAAAAAAAAAAAAAAAA

1,634,441

TGTGTGTGTGTGTGT

729,915

TGTGTGTGTGTGTGTG

662,157

  

TGTGTGTGTGTGTGTGT

624,344

GCCTGTAATCCCAGC

448,640

TAATCCCAGCACTTTG

408,698

  

TAATCCCAGCACTTTGG

386,886

ATATATATATATATA

446,009

ATATATATATATATAT

402,936

  

ATCCCAGCACTTTGGGA

385,642

AGCACTTTGGGAGGC

435,271

CCAAAGTGCTGGGATT

400,385

  

AATCCCAGCACTTTGGG

381,085

TAATCCCAGCACTTT

430,431

GCCTCCCAAAGTGCTG

398,210

  

AAAGTGCTGGGATTACA

375,216

CAAAGTGCTGGGATT

423,190

TCCCAGCACTTTGGGA

395,536

  

CTCCCAAAGTGCTGGGA

373,151

CCTCCCAAAGTGCTG

420,378

CCCAAAGTGCTGGGAT

392,938

  

AAGTGCTGGGATTACAG

369,056

ATCCCAGCACTTTGG

412,897

TGTAATCCCAGCACTT

390,400

  

GCCTCCCAAAGTGCTGG

368,350

CCAGCACTTTGGGAG

409,630

CCTCCCAAAGTGCTGG

388,489

  

CAAAGTGCTGGGATTAC

364,621

CCAGGCTGGAGTGCA

409,063

CTGTAATCCCAGCACT

385,038

  

CCCAGCACTTTGGGAGG

363,569

TGTAATCCCAGCACT

407,346

AAAGTGCTGGGATTAC

383,514

  

ATATATATATATATATA

363,206

CCCAGCACTTTGGGA

406,238

CCCAGCACTTTGGGAG

383,068

  

AGTGCTGGGATTACAGG

358,629

TCCCAGCACTTTGGG

403,080

GTGCTGGGATTACAGG

367,247

  

TCTACTAAAAATACAAA

340,685

AAGTGCTGGGATTAC

399,124

TGCACTCCAGCCTGGG

361,996

  

CTACTAAAAATACAAAA

340,602

CCTGCCTCAGCCTCC

398,260

TACTAAAAATACAAAA

360,995

  

CTCCTGCCTCAGCCTCC

328,239

TGCTGGGATTACAGG

398,070

GAGGCTGAGGCAGGAG

349,118

  

TTGTATTTTTAGTAGAG

325,500

GTGCTGGGATTACAG

394,430

CTACTAAAAATACAAA

348,511

  

TTCTCCTGCCTCAGCCT

324,096

GCACTCCAGCCTGGG

376,640

TCCTGCCTCAGCCTCC

346,669

  

TCTCTACTAAAAATACA

319,930

TTTTGTATTTTTAGT

374,747

TTGTATTTTTAGTAGA

346,193

  

GGCTGAGGCAGGAGAAT

318,655

18-mer

Freq

19-mer

Freq

  

20-mer

Freq

AAAAAAAAAAAAAAAAAA

1,345,821

AAAAAAAAAAAAAAAAAAA

1,104,496

  

AAAAAAAAAAAAAAAAAAAA

901,140

GTGTGTGTGTGTGTGTGT

569,504

TGTGTGTGTGTGTGTGTGT

536,547

  

TGTGTGTGTGTGTGTGTGTG

489,597

TCCCAAAGTGCTGGGATT

374,035

TAATCCCAGCACTTTGGGA

361,679

  

CTCCCAAAGTGCTGGGATTA

341,542

TAATCCCAGCACTTTGGG

368,428

AATCCCAGCACTTTGGGAG

353,065

  

AATCCCAGCACTTTGGGAGG

335,334

ATCCCAGCACTTTGGGAG

363,933

ATCCCAGCACTTTGGGAGG

345,608

  

GCCTCCCAAAGTGCTGGGAT

328,159

TGTAATCCCAGCACTTTG

356,809

TGTAATCCCAGCACTTTGG

338,285

  

GTAATCCCAGCACTTTGGGA

323,542

CTGTAATCCCAGCACTTT

354,805

CAAAGTGCTGGGATTACAG

337,559

  

CCCAAAGTGCTGGGATTACA

322,532

TCCCAGCACTTTGGGAGG

354,247

GCCTCCCAAAGTGCTGGGA

336,291

  

CTGTAATCCCAGCACTTTGG

320,216

GTAATCCCAGCACTTTGG

345,648

CCTGTAATCCCAGCACTTT

330,885

  

CAAAGTGCTGGGATTACAGG

315,005

CCCAGCACTTTGGGAGGC

345,007

CCCAAAGTGCTGGGATTAC

329,491

  

TTTTGTATTTTTAGTAGAGA

300,256

CCTGTAATCCCAGCACTT

343,923

CTCTACTAAAAATACAAAA

313,148

  

ATTCTCCTGCCTCAGCCTCC

280,115

TTTTGTATTTTTAGTAGA

333,004

TCTCTACTAAAAATACAAA

307,152

  

ATATATATATATATATATAT

279,576

ATATATATATATATATAT

332,710

ATATATATATATATATATA

302,986

  

TTTTTGTATTTTTAGTAGAG

272,798

TTTGTATTTTTAGTAGAG

320,328

GAGGCTGAGGCAGGAGAAT

296,297

  

CCACTGCACTCCAGCCTGGG

239,735

TCTCTACTAAAAATACAA

312,108

GGAGGCTGAGGCAGGAGAA

291,488

  

GCCTGTAATCCCAGCACTTT

238,538

AGGCTGAGGCAGGAGAAT

311,358

TTTTTGTATTTTTAGTAGA

290,073

  

GGCCTCCCAAAGTGCTGGGA

224,951

GAGGCTGAGGCAGGAGAA

308,372

CCAGGCTGGAGTGCAGTGG

273,189

  

GTCTCTACTAAAAATACAAA

195,075

TCTCCTGCCTCAGCCTCC

298,682

CACTGCACTCCAGCCTGGG

256,220

  

TTCTCCTGCCTCAGCCTCCC

191,155

TTTTTGTATTTTTAGTAG

296,585

GCCTGTAATCCCAGCACTT

247,562

  

GCCACTGCACTCCAGCCTGG

183,392

CCAGGCTGGAGTGCAGTG

292,150

GGCCTCCCAAAGTGCTGGG

230,427

  

TTTTTTTTTTTTTTTTGAGA

167,522