evio  6.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Util.h
Go to the documentation of this file.
1 //
2 // Copyright 2020, Jefferson Science Associates, LLC.
3 // Subject to the terms in the LICENSE file found in the top-level directory.
4 //
5 // EPSCI Group
6 // Thomas Jefferson National Accelerator Facility
7 // 12000, Jefferson Ave, Newport News, VA 23606
8 // (757)-269-7100
9 
10 
11 #ifndef EVIO_6_0_UTIL_H
12 #define EVIO_6_0_UTIL_H
13 
14 #include <iostream>
15 #include <iomanip>
16 #include <cstdlib>
17 #include <cstdio>
18 #include <string>
19 #include <regex>
20 #include <vector>
21 #include <fstream>
22 
23 #include "EvioException.h"
24 #include "ByteOrder.h"
25 #include "ByteBuffer.h"
26 #include "EvioNode.h"
27 #include "IBlockHeader.h"
28 #include "DataType.h"
29 
30 
31 namespace evio {
32 
33 
41  class Util {
42 
43  public:
44 
45  // Some methods to help deal with padding data to 4-byte boundaries
46 
52  static uint32_t getWords(uint32_t length) {
53  uint32_t words = length/4;
54  if (getPadding(length) > 0) words++;
55  return words;
56  }
57 
58 
64  static uint32_t getPadding(uint32_t length) {
66  static uint32_t padValue[4] = {0,3,2,1};
67  return padValue[length%4];
68  }
69 
70 
71  //-----------------------------------------------------------------------
72 
73 
80  static bool iStrEquals(const std::string& a, const std::string& b) {
81  unsigned int sz = a.size();
82  if (b.size() != sz)
83  return false;
84  for (unsigned int i = 0; i < sz; ++i)
85  if (tolower(a[i]) != tolower(b[i]))
86  return false;
87  return true;
88  }
89 
90 
96  static const DataType & getDataType(const std::string & type) {
97 
98  if (iStrEquals(type,"int8")) return DataType::CHAR8;
99  else if (iStrEquals(type,"uint8")) return DataType::UCHAR8;
100  else if (iStrEquals(type,"int16")) return DataType::SHORT16;
101  else if (iStrEquals(type,"uint16")) return DataType::USHORT16;
102  else if (iStrEquals(type,"int32")) return DataType::INT32;
103  else if (iStrEquals(type,"uint32")) return DataType::UINT32;
104  else if (iStrEquals(type,"int64")) return DataType::LONG64;
105  else if (iStrEquals(type,"uint64")) return DataType::ULONG64;
106  else if (iStrEquals(type,"long64")) return DataType::LONG64;
107  else if (iStrEquals(type,"ulong64")) return DataType::ULONG64;
108  else if (iStrEquals(type,"float32")) return DataType::FLOAT32;
109  else if (iStrEquals(type,"float64")) return DataType::DOUBLE64;
110  else if (iStrEquals(type,"double64")) return DataType::DOUBLE64;
111  else if (iStrEquals(type,"string")) return DataType::CHARSTAR8;
112  else if (iStrEquals(type,"composite")) return DataType::COMPOSITE;
113  else if (iStrEquals(type,"unknown32")) return DataType::UNKNOWN32;
114  else if (iStrEquals(type,"tagsegment")) return DataType::TAGSEGMENT;
115  else if (iStrEquals(type,"segment")) return DataType::ALSOSEGMENT;
116  else if (iStrEquals(type,"bank")) return DataType::ALSOBANK;
117 
119  }
120 
121 
133  static void toIntArray(char const *data, uint32_t dataLen,
134  const ByteOrder & byteOrder, uint32_t *dest) {
135 
136  if (data == nullptr || dest == nullptr) {
137  throw EvioException("bad arg");
138  }
139 
140  for (int i = 0; i < dataLen-3; i+=4) {
141  dest[i/4] = toInt(data[i ], data[i+1], data[i+2], data[i+3], byteOrder);
142  }
143  }
144 
145 
156  static uint32_t toInt(char b1, char b2, char b3, char b4, const ByteOrder & byteOrder) {
157 
158  if (byteOrder == ByteOrder::ENDIAN_BIG) {
159  return (
160  (0xff & b1) << 24 |
161  (0xff & b2) << 16 |
162  (0xff & b3) << 8 |
163  (0xff & b4)
164  );
165  }
166  else {
167  return (
168  (0xff & b1) |
169  (0xff & b2) << 8 |
170  (0xff & b3) << 16 |
171  (0xff & b4) << 24
172  );
173  }
174  }
175 
176 
185  static uint32_t toInt(uint8_t const * data, ByteOrder const & byteOrder) {
186  if (data == nullptr) {
187  throw EvioException("null arg");
188  }
189 
190  if (byteOrder == ByteOrder::ENDIAN_BIG) {
191  return (
192  (0xff & data[0]) << 24 |
193  (0xff & data[1]) << 16 |
194  (0xff & data[2]) << 8 |
195  (0xff & data[3])
196  );
197  }
198  else {
199  return (
200  (0xff & data[0]) |
201  (0xff & data[1]) << 8 |
202  (0xff & data[2]) << 16 |
203  (0xff & data[3]) << 24
204  );
205  }
206  }
207 
208 
217  static uint64_t toLong(uint8_t const * data, ByteOrder const & byteOrder) {
218  if (data == nullptr) {
219  throw EvioException("null arg");
220  }
221 
222  if (byteOrder == ByteOrder::ENDIAN_BIG) {
223  return (
224  (uint64_t)(0xff & data[0]) << 56 |
225  (uint64_t)(0xff & data[1]) << 48 |
226  (uint64_t)(0xff & data[2]) << 40 |
227  (uint64_t)(0xff & data[3]) << 32 |
228  (uint64_t)(0xff & data[4]) << 24 |
229  (uint64_t)(0xff & data[5]) << 16 |
230  (uint64_t)(0xff & data[6]) << 8 |
231  (uint64_t)(0xff & data[7])
232  );
233  }
234  else {
235  return (
236  (uint64_t)(0xff & data[0]) |
237  (uint64_t)(0xff & data[1]) << 8 |
238  (uint64_t)(0xff & data[2]) << 16 |
239  (uint64_t)(0xff & data[3]) << 24 |
240  (uint64_t)(0xff & data[4]) << 32 |
241  (uint64_t)(0xff & data[5]) << 40 |
242  (uint64_t)(0xff & data[6]) << 48 |
243  (uint64_t)(0xff & data[7]) << 56
244  );
245  }
246  }
247 
248 
257  static void toBytes(uint32_t data, const ByteOrder & byteOrder, uint8_t* dest) {
258 
259  if (dest == nullptr) {
260  throw EvioException("bad arg(s)");
261  }
262 
263  if (byteOrder == ByteOrder::ENDIAN_BIG) {
264  dest[0] = (uint8_t)(data >> 24);
265  dest[1] = (uint8_t)(data >> 16);
266  dest[2] = (uint8_t)(data >> 8);
267  dest[3] = (uint8_t)(data );
268  }
269  else {
270  dest[0] = (uint8_t)(data );
271  dest[1] = (uint8_t)(data >> 8);
272  dest[2] = (uint8_t)(data >> 16);
273  dest[3] = (uint8_t)(data >> 24);
274  }
275  }
276 
277 
286  static void toBytes(uint64_t data, const ByteOrder & byteOrder, uint8_t* dest) {
287 
288  if (dest == nullptr) {
289  throw EvioException("bad arg(s)");
290  }
291 
292  if (byteOrder == ByteOrder::ENDIAN_BIG) {
293  dest[0] = (uint8_t)(data >> 56);
294  dest[1] = (uint8_t)(data >> 48);
295  dest[2] = (uint8_t)(data >> 40);
296  dest[3] = (uint8_t)(data >> 32);
297  dest[4] = (uint8_t)(data >> 24);
298  dest[5] = (uint8_t)(data >> 16);
299  dest[6] = (uint8_t)(data >> 8);
300  dest[7] = (uint8_t)(data );
301  }
302  else {
303  dest[0] = (uint8_t)(data );
304  dest[1] = (uint8_t)(data >> 8);
305  dest[2] = (uint8_t)(data >> 16);
306  dest[3] = (uint8_t)(data >> 24);
307  dest[4] = (uint8_t)(data >> 32);
308  dest[5] = (uint8_t)(data >> 40);
309  dest[6] = (uint8_t)(data >> 48);
310  dest[7] = (uint8_t)(data >> 56);
311  }
312  }
313 
314 
323  static void toBytes(uint32_t data, const ByteOrder & byteOrder,
324  std::vector<uint8_t> & dest, size_t off) {
325 
326  if (byteOrder == ByteOrder::ENDIAN_BIG) {
327  dest[off ] = (uint8_t)(data >> 24);
328  dest[off+1] = (uint8_t)(data >> 16);
329  dest[off+2] = (uint8_t)(data >> 8);
330  dest[off+3] = (uint8_t)(data );
331  }
332  else {
333  dest[off ] = (uint8_t)(data );
334  dest[off+1] = (uint8_t)(data >> 8);
335  dest[off+2] = (uint8_t)(data >> 16);
336  dest[off+3] = (uint8_t)(data >> 24);
337  }
338  }
339 
340 
349  static void toBytes(uint16_t data, const ByteOrder & byteOrder, uint8_t* dest) {
350 
351  if (dest == nullptr) {
352  throw EvioException("bad arg(s)");
353  }
354 
355  if (byteOrder == ByteOrder::ENDIAN_BIG) {
356  dest[0] = (uint8_t)(data >> 8);
357  dest[1] = (uint8_t)(data );
358  }
359  else {
360  dest[0] = (uint8_t)(data );
361  dest[1] = (uint8_t)(data >> 8);
362  }
363  }
364 
365 
374  static void toBytes(uint16_t data, const ByteOrder & byteOrder,
375  std::vector<uint8_t> & dest, size_t off) {
376 
377  if (byteOrder == ByteOrder::ENDIAN_BIG) {
378  dest[off ] = (uint8_t)(data >> 8);
379  dest[off+1] = (uint8_t)(data );
380  }
381  else {
382  dest[off ] = (uint8_t)(data );
383  dest[off+1] = (uint8_t)(data >> 8);
384  }
385  }
386 
387 
392  static const std::string& NO_NAME_STRING() {
393  // Initialize the static variable
394  static std::string s("???");
395  return s;
396  }
397 
398 
412  static uint32_t findEvioVersion(ByteBuffer & bb, size_t initialPos) {
413  // Look at first record header
414 
415  // Have enough remaining bytes to read 8 words of header?
416  if (bb.limit() - initialPos < 32) {
417  throw std::underflow_error("not enough data to read in header");
418  }
419 
420  // Set the byte order to match the file's ordering.
421 
422  // Check the magic number for endianness (buffer defaults to big endian)
423  ByteOrder byteOrder = bb.order();
424 
425  // Offset to magic # is in the SAME LOCATION FOR ALL EVIO VERSIONS
426  uint32_t magicNumber = bb.getUInt(initialPos + IBlockHeader::MAGIC_OFFSET);
427  if (magicNumber != IBlockHeader::MAGIC_NUMBER) {
428  if (byteOrder == ByteOrder::ENDIAN_BIG) {
429  byteOrder = ByteOrder::ENDIAN_LITTLE;
430  }
431  else {
432  byteOrder = ByteOrder::ENDIAN_BIG;
433  }
434  bb.order(byteOrder);
435 
436  // Reread magic number to make sure things are OK
437  magicNumber = bb.getInt(initialPos + IBlockHeader::MAGIC_OFFSET);
438  if (magicNumber != IBlockHeader::MAGIC_NUMBER) {
439  throw EvioException("magic number is bad, " + std::to_string(magicNumber));
440  }
441  }
442 
443  // Find the version number, again, SAME LOCATION FOR ALL EVIO VERSIONS
444  uint32_t bitInfo = bb.getUInt(initialPos + IBlockHeader::BIT_INFO_OFFSET);
445  return bitInfo & IBlockHeader::VERSION_MASK;
446  }
447 
448 
458  static void printBytes(const std::shared_ptr<ByteBuffer> buf, uint32_t position, uint32_t bytes,
459  const std::string & label) {
460  printBytes(*(buf.get()), position, bytes, label);
461  }
462 
472  static void printBytes(const ByteBuffer & buf, uint32_t position, uint32_t bytes, const std::string & label) {
473 
474  // Make sure we stay in bounds
475  bytes = bytes + position > buf.capacity() ? (buf.capacity() - position) : bytes;
476 
477  if (!label.empty()) std::cout << label << ":" << std::endl;
478 
479  if (bytes < 1) {
480  std::cout << " no data in buf from position = " << position << std::endl;
481  return;
482  }
483 
484  for (int i=0; i < bytes; i++) {
485  if (i%20 == 0) {
486  std::cout << std::endl << std::dec << std::right << std::setfill(' ') <<
487  " Buf(" << std::setw(3) << (i + 1) <<
488  " - "<< std::setw(3) << (i + 20) << ") = ";
489  }
490  else if (i%4 == 0) {
491  std::cout << " ";
492  }
493  // Accessing buf in this way does not change position or limit of buffer
494  std::cout << std::hex << std::noshowbase << std::internal << std::setfill('0') <<
495  std::setw(2) << (int)(buf[i + position]) << " ";
496  }
497 
498  std::cout << std::dec << std::endl << std::endl << std::setfill(' ');
499  }
500 
501 
510  static void printBytes(uint8_t const * data, uint32_t bytes, const std::string & label) {
511 
512  if (!label.empty()) std::cout << label << ":" << std::endl;
513 
514  if (bytes < 1) {
515  return;
516  }
517 
518  for (int i=0; i < bytes; i++) {
519  if (i%20 == 0) {
520  std::cout << std::endl << std::dec << std::right << std::setfill(' ') <<
521  " Buf(" << std::setw(3) << (i + 1) <<
522  " - "<< std::setw(3) << (i + 20) << ") = ";
523  }
524  else if (i%4 == 0) {
525  std::cout << " ";
526  }
527  // Accessing buf in this way does not change position or limit of buffer
528  std::cout << std::hex << std::noshowbase << std::internal << std::setfill('0') <<
529  std::setw(2) << (int)(*((data + i))) << " ";
530  }
531 
532  std::cout << std::dec << std::endl << std::endl << std::setfill(' ');
533  }
534 
535 
545  static void printBytes(const std::string & fileName, uint64_t offset,
546  uint32_t bytes, const std::string & label) {
547 
548  if (fileName.empty()) {
549  std::cout << "Util::printBytes: fileName arg is invalid" << std::endl;
550  return;
551  }
552 
553  try {
554  std::ifstream inStreamRandom;
555 
556  // "ate" mode flag will go immediately to file's end (do this to get its size)
557  inStreamRandom.open(fileName, std::ios::in | std::ios::ate);
558  size_t fileSize = inStreamRandom.tellg();
559  // Go back to beginning of file
560  inStreamRandom.seekg(0);
561 
562  // read data
563  uint64_t limit = bytes + offset > fileSize ? fileSize : bytes + offset;
564  auto dataLen = (uint32_t)(limit - offset);
565  ByteBuffer buf(dataLen);
566  uint8_t * array = buf.array();
567  inStreamRandom.read(reinterpret_cast<char *>(array), dataLen);
568 
569  printBytes(buf, 0, dataLen, label);
570  }
571  catch (std::exception & e) {
572  // e.what() does not give any useful information...
573  std::cout << "Util::printBytes: " << strerror(errno) << std::endl;
574  }
575  }
576 
577 
585  static void writeBytes(const std::string & fileName, ByteBuffer & buf) {
586 
587  if (fileName.empty()) {
588  std::cout << "Util::writeBytes: fileName arg is invalid" << std::endl;
589  throw EvioException("fileName arg is invalid");
590  }
591 
592  std::fstream file;
593  file.open(fileName, std::ios::binary | std::ios::out);
594  if (file.fail()) {
595  std::cout << "error opening file " << fileName << std::endl;
596  throw EvioException("error opening file " + fileName);
597  }
598 
599  // Write this into a file
600  file.write(reinterpret_cast<char *>(buf.array() + buf.arrayOffset() + buf.position()),
601  buf.remaining());
602 
603  if (file.fail()) {
604  std::cout << "error writing to file " << fileName << std::endl;
605  throw EvioException("error writing to file " + fileName);
606  }
607  file.close();
608  }
609 
610 
620  static void readBytes(const std::string & fileName, ByteBuffer & buf) {
621 
622  if (fileName.empty()) {
623  std::cout << "Util::writeBytes: fileName arg is invalid" << std::endl;
624  throw EvioException("fileName arg is invalid");
625  }
626 
627  std::fstream file;
628  file.open(fileName, std::ios::binary | std::ios::in);
629  if (file.fail()) {
630  std::cout << "error opening file " << fileName << std::endl;
631  throw EvioException("error opening file " + fileName);
632  }
633 
634  // Write this into a file
635  file.read(reinterpret_cast<char *>(buf.array() + buf.arrayOffset() + buf.position()),
636  buf.remaining());
637 
638  if (file.fail()) {
639  std::cout << "error reading from file " << fileName << std::endl;
640  throw EvioException("error reading from file " + fileName);
641  }
642  file.close();
643  buf.position(buf.limit());
644  }
645 
646 
654  static int powerOfTwo(int x, bool roundUp) {
655  if (x < 0) return -1;
656 
657  // The following algorithm is found in
658  // "Hacker's Delight" by Henry Warren Jr.
659 
660  if (roundUp) {
661  x = x - 1;
662  x |= (x>>1);
663  x |= (x>>2);
664  x |= (x>>4);
665  x |= (x>>8);
666  x |= (x>>16);
667  return x + 1;
668  }
669 
670  int y;
671  do {
672  y = x;
673  x &= (x - 1);
674  } while (x != 0);
675  return y;
676  }
677 
678 
684  static void stringToASCII(const std::string & input, std::vector<uint8_t> & array) {
685  size_t inputSize = input.size();
686  array.clear();
687  array.reserve(inputSize);
688 
689  for (int i=0; i < inputSize; i++) {
690  array.push_back((uint8_t) input[i]);
691  }
692  }
693 
694 
701  static void stringToASCII(const std::string & input, ByteBuffer & buf) {
702  size_t inputSize = input.size();
703  buf.clear();
704  buf.expand(inputSize);
705 
706  for (int i=0; i < inputSize; i++) {
707  buf.put(i, input[i]);
708  }
709  }
710 
711 
713  //
714  // Methods for parsing strings in evio format.
715  // These are placed here to break the circular dependency between
716  // BaseStructure and CompositeData.
717  //
719 
720 
729  static uint32_t stringsToRawSize(std::vector<std::string> const & strings) {
730 
731  if (strings.empty()) {
732  return 0;
733  }
734 
735  uint32_t dataLen = 0;
736  for (std::string const & s : strings) {
737  dataLen += s.length() + 1; // don't forget the null char after each string
738  }
739 
740  // Add any necessary padding to 4 byte boundaries.
741  // IMPORTANT: There must be at least one '\004'
742  // character at the end. This distinguishes evio
743  // string array version from earlier version.
744  int pads[] = {4,3,2,1};
745  dataLen += pads[dataLen%4];
746 
747  return dataLen;
748  }
749 
750 
758  static uint32_t stringToRawSize(const std::string & str) {
759 
760  if (str.empty()) {
761  return 0;
762  }
763 
764  uint32_t dataLen = str.length() + 1; // don't forget the null char after each string
765 
766  // Add any necessary padding to 4 byte boundaries.
767  // IMPORTANT: There must be at least one '\004'
768  // character at the end. This distinguishes evio
769  // string array version from earlier version.
770  int pads[] = {4,3,2,1};
771  dataLen += pads[dataLen%4];
772 
773  return dataLen;
774  }
775 
776 
784  static void stringsToRawBytes(std::vector<std::string> & strings,
785  std::vector<uint8_t> & bytes) {
786 
787  if (strings.empty()) {
788  bytes.clear();
789  return;
790  }
791 
792  // create some storage
793  int dataLen = stringsToRawSize(strings);
794  std::string strData;
795  strData.reserve(dataLen);
796 
797  for (std::string const & s : strings) {
798  // add string
799  strData.append(s);
800  // add ending null
801  strData.append(1, '\000');
802  }
803 
804  // Add any necessary padding to 4 byte boundaries.
805  // IMPORTANT: There must be at least one '\004'
806  // character at the end. This distinguishes evio
807  // string array version from earlier version.
808  int pads[] = {4,3,2,1};
809  switch (pads[strData.length()%4]) {
810  case 4:
811  strData.append(4, '\004');
812  break;
813  case 3:
814  strData.append(3, '\004');
815  break;
816  case 2:
817  strData.append(2, '\004');
818  break;
819  case 1:
820  strData.append(1, '\004');
821  }
822 
823  // Transform to ASCII
824  bytes.resize(dataLen);
825  for (int i=0; i < strData.length(); i++) {
826  bytes[i] = strData[i];
827  }
828  }
829 
830 
838  static void unpackRawBytesToStrings(std::vector<uint8_t> & bytes, size_t offset,
839  std::vector<std::string> & strData) {
840  unpackRawBytesToStrings(bytes, offset, bytes.size(), strData);
841  }
842 
843 
854  static void unpackRawBytesToStrings(std::vector<uint8_t> & bytes,
855  size_t offset, size_t maxLength,
856  std::vector<std::string> & strData) {
857  int length = bytes.size() - offset;
858  if (bytes.empty() || (length < 4)) return;
859 
860  // Don't read read more than maxLength ASCII characters
861  length = length > maxLength ? maxLength : length;
862 
863  std::string sData(reinterpret_cast<const char *>(bytes.data()) + offset, length);
864  return stringBuilderToStrings(sData, true, strData);
865  }
866 
867 
877  static void unpackRawBytesToStrings(uint8_t *bytes, size_t length,
878  std::vector<std::string> & strData) {
879  if (bytes == nullptr) return;
880 
881  std::string sData(reinterpret_cast<const char *>(bytes), length);
882  // std::cout << "unpackRawBytesToStrings: string = " << sData << std::endl;
883  return stringBuilderToStrings(sData, true, strData);
884  }
885 
886 
895  static void unpackRawBytesToStrings(ByteBuffer & buffer,
896  size_t pos, size_t length,
897  std::vector<std::string> & strData) {
898 
899  if (length < 4) return;
900 
901  std::string sData(reinterpret_cast<const char *>(buffer.array() + buffer.arrayOffset()) + pos, length);
902  return stringBuilderToStrings(sData, false, strData);
903  }
904 
905 
920  static void stringBuilderToStrings(std::string const & strData, bool onlyGoodChars,
921  std::vector<std::string> & strings) {
922 
923  // Each string is terminated with a null (char val = 0)
924  // and in addition, the end is padded by ASCII 4's (char val = 4).
925  // However, in the legacy versions of evio, there is only one
926  // null-terminated string and anything as padding. To accommodate legacy evio, if
927  // there is not an ending ASCII value 4, anything past the first null is ignored.
928  // After doing so, split at the nulls. Do not use the String
929  // method "split" as any empty trailing strings are unfortunately discarded.
930 
931  char c;
932  std::vector<int> nullIndexList;
933  nullIndexList.reserve(10);
934  uint32_t nullCount = 0, goodChars = 0;
935  bool badFormat = true;
936 
937  size_t length = strData.length();
938  bool noEnding4 = false;
939  if (strData[length - 1] != '\004') {
940  noEnding4 = true;
941  }
942 
943  for (int i=0; i < length; i++) {
944  c = strData[i];
945 
946  // If char is a null
947  if (c == 0) {
948  nullCount++;
949  nullIndexList.push_back(i);
950  // If evio v2 or 3, only 1 null terminated string exists
951  // and padding is just junk or nonexistent.
952  if (noEnding4) {
953  badFormat = false;
954  break;
955  }
956  }
957  // Look for any non-printing/control characters (not including null)
958  // and end the string there. Allow tab & newline.
959  else if ((c < 32 || c > 126) && c != 9 && c != 10) {
960  if (nullCount < 1) {
961  badFormat = true;
962  // Getting garbage before first null.
963  break;
964  }
965 
966  // Already have at least one null & therefore a String.
967  // Now we have junk or non-printing ascii which is
968  // possibly the ending 4.
969 
970  // If we have a 4, investigate further to see if format
971  // is entirely valid.
972  if (c == '\004') {
973  // How many more chars are there?
974  int charsLeft = length - (i+1);
975 
976  // Should be no more than 3 additional 4's before the end
977  if (charsLeft > 3) {
978  badFormat = true;
979  break;
980  }
981  else {
982  // Check to see if remaining chars are all 4's. If not, bad.
983  for (int j=1; j <= charsLeft; j++) {
984  c = strData[i+j];
985  if (c != '\004') {
986  badFormat = true;
987  goto pastOuterLoop;
988  }
989  }
990  badFormat = false;
991  break;
992  }
993  }
994  else {
995  badFormat = true;
996  break;
997  }
998  }
999 
1000  pastOuterLoop:
1001 
1002  // Number of good ASCII chars we have
1003  goodChars++;
1004  }
1005 
1006  strings.clear();
1007 
1008  if (badFormat) {
1009  if (onlyGoodChars) {
1010  // Return everything in one String WITHOUT garbage
1011  std::string goodStr(strData.data(), goodChars);
1012  strings.push_back(goodStr);
1013  return;
1014  }
1015  // Return everything in one String including possible garbage
1016  strings.push_back(strData);
1017  return;
1018  }
1019 
1020  // If here, raw bytes are in the proper format
1021 
1022  int firstIndex = 0;
1023  for (int nullIndex : nullIndexList) {
1024  std::string str(strData.data() + firstIndex, (nullIndex - firstIndex));
1025  strings.push_back(str);
1026  firstIndex = nullIndex + 1;
1027  }
1028  }
1029 
1030 
1033 
1034 
1040  static void expandEnvironmentalVariables(std::string & text) {
1041  static std::regex env("\\$\\(([^)]+)\\)");
1042  std::smatch match;
1043  while ( std::regex_search(text, match, env) ) {
1044  char * s = getenv(match[1].str().c_str());
1045  std::string var(s == nullptr ? "" : s);
1046  text.replace(match[0].first, match[0].second, var);
1047  //text.replace(match.prefix().length(), match[0].length(), var);
1048  }
1049  }
1050 
1051 
1061  static uint32_t countAndFixIntSpecifiers(std::string & text) {
1062  static std::regex specifier("%(\\d*)([xd])");
1063 
1064  auto begin = std::sregex_iterator(text.begin(), text.end(), specifier);
1065  auto end = std::sregex_iterator();
1066  uint32_t specifierCount = std::distance(begin, end);
1067 
1068  std::sregex_iterator i = begin;
1069 
1070  // Go thru all specifiers in text, only once
1071  for (int j = 0; j < specifierCount; j++) {
1072  if (j > 0) {
1073  // skip over specifiers previously dealt with (text can change each loop)
1074  i = std::sregex_iterator(text.begin(), text.end(), specifier);
1075  int k=j;
1076  while (k-- > 0) i++;
1077  }
1078 
1079  std::smatch match = *i;
1080  std::string specWidth = match[1].str();
1081  // Make sure any number preceding "x" or "d" starts with a 0 or else
1082  // there will be empty spaces in the resulting string (i.e. file name).
1083  if (specWidth.length() > 0 && specWidth[0] != '0') {
1084  text.replace(match[1].first, match[1].second, "0" + specWidth);
1085  }
1086  }
1087 
1088  return specifierCount;
1089  }
1090 
1091 
1119  static int generateBaseFileName(const std::string & baseName, const std::string & runType,
1120  std::string & modifiedBaseName) {
1121 
1122  int* returnInts = new int[1];
1123 
1124  // Return the modified base file name
1125  modifiedBaseName = baseName;
1126 
1127  if (modifiedBaseName.length() < 1) {
1128  throw EvioException("empty string arg");
1129  }
1130 
1131  // Replace all %s occurrences with runType
1132  std::string::size_type pos;
1133  while ((pos = modifiedBaseName.find("%s")) != std::string::npos) {
1134  modifiedBaseName = (runType.length() < 1) ? modifiedBaseName.replace(pos, 2, "") :
1135  modifiedBaseName.replace(pos, 2, runType);
1136  }
1137 
1138  // Scan for environmental variables of the form $(xxx)
1139  // and substitute the values for them (blank string if not found)
1140  expandEnvironmentalVariables(modifiedBaseName);
1141 
1142  // Count # of int specifiers, making sure any number preceding
1143  // "x" or "d" starts with a 0 or else there will be empty spaces
1144  // in the file name (%3x --> %03x).
1145  uint32_t specifierCount = countAndFixIntSpecifiers(modifiedBaseName);
1146 
1147  if (specifierCount > 3) {
1148  throw EvioException("baseName arg is improperly formatted");
1149  }
1150 
1151  // Return # of C-style int format specifiers
1152  return specifierCount;
1153  }
1154 
1155 
1192  static std::string generateFileName(std::string fileName, uint32_t specifierCount,
1193  uint32_t runNumber, uint64_t split, uint32_t splitNumber,
1194  uint32_t streamId, uint32_t streamCount) {
1195 
1196  if (streamCount < 1) streamCount = 1;
1197  if (splitNumber < 1) splitNumber = 0;
1198  if (runNumber < 0) runNumber = 0;
1199  if (streamId < 0) streamId = 0;
1200  bool oneStream = streamCount < 2;
1201 
1202  if (fileName.length() < 1) {
1203  fileName = "file";
1204  }
1205 
1206  //cout << "generateFileName: split# = " << splitNumber << ", start with " << fileName <<
1207  //", streamId = " << streamId << ", stream count = " << streamCount << ", one stream = " <<
1208  //oneStream << endl;
1209  // NOTE: no run #s are tacked on the end!
1210 
1211  // If we're splitting files which is always the case of CODA users ...
1212  if (split > 0L) {
1213  // For no specifiers: tack stream id and split # onto end of file name
1214  if (specifierCount < 1) {
1215  if (oneStream) {
1216  fileName += "." + std::to_string(splitNumber);
1217  }
1218  else {
1219  fileName += "." + std::to_string(streamId) +
1220  "." + std::to_string(splitNumber);
1221  }
1222  }
1223  // For 1 specifier: insert run # at specified location,
1224  // then tack stream id and split # onto end of file name
1225  else if (specifierCount == 1) {
1226  char tempChar[fileName.length() + 1024];
1227  int err = std::sprintf(tempChar, fileName.c_str(), runNumber);
1228  if (err < 0) throw EvioException("badly formatted file name");
1229  std::string temp(tempChar);
1230  fileName = temp;
1231 
1232  if (oneStream) {
1233  fileName += "." + std::to_string(splitNumber);
1234  }
1235  else {
1236  fileName += "." + std::to_string(streamId) +
1237  "." + std::to_string(splitNumber);
1238  }
1239  }
1240  // For 2 specifiers: insert run # and split # at specified locations
1241  // and place stream id immediately before split #.
1242  else if (specifierCount == 2) {
1243  if (!oneStream) {
1244  // In order to place streamId before split#, place a %d in the filename
1245  // immediately before 2nd specifier.
1246  static std::regex specifier("(%\\d*[xd])");
1247  auto it = std::sregex_iterator(fileName.begin(), fileName.end(), specifier);
1248 
1249  // Go to 2nd match
1250  it++;
1251  std::smatch match = *it;
1252  fileName.replace(match[0].first, match[0].second, "%d." + match.str());
1253 
1254  char tempChar[fileName.length() + 1024];
1255  int err = std::sprintf(tempChar, fileName.c_str(), runNumber, streamId, splitNumber);
1256  if (err < 0) throw EvioException("badly formatted file name");
1257  std::string temp(tempChar);
1258  fileName = temp;
1259  }
1260  else {
1261  char tempChar[fileName.length() + 1024];
1262  int err = std::sprintf(tempChar, fileName.c_str(), runNumber, splitNumber);
1263  if (err < 0) throw EvioException("badly formatted file name");
1264  std::string temp(tempChar);
1265  fileName = temp;
1266  }
1267  }
1268  // For 3 specifiers: insert run #, stream id, and split # at specified locations
1269  else if (specifierCount == 3) {
1270  char tempChar[fileName.length() + 1024];
1271  int err = std::sprintf(tempChar, fileName.c_str(), runNumber, streamId, splitNumber);
1272  if (err < 0) throw EvioException("badly formatted file name");
1273  std::string temp(tempChar);
1274  fileName = temp;
1275  }
1276 
1277  }
1278  // If we're not splitting files, then CODA isn't being used and stream id is
1279  // probably meaningless.
1280  else {
1281  // For no specifiers: tack stream id onto end of file name
1282  if (specifierCount < 1) {
1283  if (!oneStream) {
1284  fileName += "." + std::to_string(streamId);
1285  }
1286  }
1287  else if (specifierCount == 1) {
1288  // Insert runNumber
1289  char tempChar[fileName.length() + 1024];
1290  int err = std::sprintf(tempChar, fileName.c_str(), runNumber);
1291  if (err < 0) throw EvioException("badly formatted file name");
1292  std::string temp(tempChar);
1293  fileName = temp;
1294 
1295  if (!oneStream) {
1296  fileName += "." + std::to_string(streamId);
1297  }
1298  }
1299  else if (specifierCount == 2) {
1300  // First get rid of the extra (2nd) int format specifier as no split # exists
1301  static std::regex specifier("(%\\d*[xd])");
1302  auto it = std::sregex_iterator(fileName.begin(), fileName.end(), specifier);
1303  // Go to 2nd match
1304  it++;
1305  std::smatch match = *it;
1306  fileName.replace(match[0].first, match[0].second, "");
1307 
1308  // Insert runNumber into first specifier
1309  char tempChar[fileName.length() + 1024];
1310  int err = std::sprintf(tempChar, fileName.c_str(), runNumber);
1311  if (err < 0) throw EvioException("badly formatted file name");
1312  std::string temp(tempChar);
1313  fileName = temp;
1314 
1315  if (!oneStream) {
1316  fileName += "." + std::to_string(streamId);
1317  }
1318  }
1319  else if (specifierCount == 3) {
1320  // Get rid of extra (3rd) int format specifier as no split # exists
1321  static std::regex specifier("(%\\d*[xd])");
1322  auto it = std::sregex_iterator(fileName.begin(), fileName.end(), specifier);
1323  // Go to 3rd match
1324  it++; it++;
1325  std::smatch match = *it;
1326  fileName.replace(match[0].first, match[0].second, "");
1327 
1328  // Insert runNumber into first specifier, stream id into 2nd
1329  char tempChar[fileName.length() + 1024];
1330  int err = std::sprintf(tempChar, fileName.c_str(), runNumber, streamId);
1331  if (err < 0) throw EvioException("badly formatted file name");
1332  std::string temp(tempChar);
1333  fileName = temp;
1334  }
1335  }
1336  //cout << "generateFileName: end with " << fileName << endl;
1337 
1338  return fileName;
1339  }
1340 
1341  };
1342 
1343 }
1344 
1345 
1346 #endif //EVIO_6_0_UTIL_H
static void unpackRawBytesToStrings(std::vector< uint8_t > &bytes, size_t offset, size_t maxLength, std::vector< std::string > &strData)
This method extracts an array of strings from byte array of raw evio string data. ...
Definition: Util.h:854
static uint32_t countAndFixIntSpecifiers(std::string &text)
Count the number of integer specifiers (e.g.
Definition: Util.h:1061
This class is copied from one of the same name in the Java programming language.
Definition: ByteBuffer.h:42
void expand(size_t newSize)
This method expands the size of this buffer if it&#39;s less than the given size.
Definition: ByteBuffer.cpp:386
static const DataType ALSOBANK
Bank alternate value.
Definition: DataType.h:50
static const DataType LONG64
64 bit int.
Definition: DataType.h:45
static const DataType UCHAR8
Unsigned 8 bit int.
Definition: DataType.h:43
static const DataType INT32
32 bit int.
Definition: DataType.h:47
static void printBytes(const std::string &fileName, uint64_t offset, uint32_t bytes, const std::string &label)
This method takes a file and prints out the desired number of bytes from the given offset...
Definition: Util.h:545
const ByteOrder & order() const
Get the byte order of the data.
Definition: ByteBuffer.cpp:466
static const ByteOrder ENDIAN_LITTLE
Little endian byte order.
Definition: ByteOrder.h:57
static uint32_t stringsToRawSize(std::vector< std::string > const &strings)
This method returns the number of bytes in a raw evio format of the given string array, not including header.
Definition: Util.h:729
static void toBytes(uint32_t data, const ByteOrder &byteOrder, uint8_t *dest)
Write int into byte array.
Definition: Util.h:257
static const std::string & NO_NAME_STRING()
Get a string used to indicate that no name can be determined.
Definition: Util.h:392
static std::string generateFileName(std::string fileName, uint32_t specifierCount, uint32_t runNumber, uint64_t split, uint32_t splitNumber, uint32_t streamId, uint32_t streamCount)
This method does NOT work on its own.
Definition: Util.h:1192
static void stringToASCII(const std::string &input, std::vector< uint8_t > &array)
Return an input string as ASCII in which each character is one byte.
Definition: Util.h:684
ByteBuffer & put(const ByteBuffer &src)
Relative bulk put method.
Definition: ByteBuffer.cpp:1325
size_t remaining() const
Returns the number of bytes from the current position to the end of the data.
Definition: ByteBuffer.cpp:497
Class containing static methods of general purpose.
Definition: Util.h:41
static void toBytes(uint32_t data, const ByteOrder &byteOrder, std::vector< uint8_t > &dest, size_t off)
Write int into byte vector.
Definition: Util.h:323
Numerical values associated with endian byte order.
Definition: ByteOrder.h:53
static void unpackRawBytesToStrings(uint8_t *bytes, size_t length, std::vector< std::string > &strData)
This method extracts an array of strings from byte array of raw evio string data. ...
Definition: Util.h:877
uint32_t getUInt() const
Relative get method for reading an unsigned int value.
Definition: ByteBuffer.cpp:1115
static const DataType CHARSTAR8
ASCII characters.
Definition: DataType.h:39
static int generateBaseFileName(const std::string &baseName, const std::string &runType, std::string &modifiedBaseName)
This method generates part of a file name given a base file name as an argument.
Definition: Util.h:1119
static bool iStrEquals(const std::string &a, const std::string &b)
Case insensitive compare for 2 strings.
Definition: Util.h:80
Exception class for Evio software package.
Definition: EvioException.h:29
static const DataType DOUBLE64
64 bit double.
Definition: DataType.h:44
static const DataType COMPOSITE
Composite data type.
Definition: DataType.h:51
size_t capacity() const
Returns the total available bytes in this buffer.
Definition: ByteBuffer.cpp:504
static const DataType FLOAT32
32 bit float.
Definition: DataType.h:38
static void unpackRawBytesToStrings(ByteBuffer &buffer, size_t pos, size_t length, std::vector< std::string > &strData)
This method extracts an array of strings from buffer containing raw evio string data.
Definition: Util.h:895
static const uint32_t VERSION_MASK
Mask to get version number from bitinfo word in header.
Definition: IBlockHeader.h:51
size_t arrayOffset() const
Get the offset within this buffer&#39;s backing array of the first element of the buffer.
Definition: ByteBuffer.cpp:490
static const DataType NOT_A_VALID_TYPE
Not a valid data type.
Definition: DataType.h:63
static void toBytes(uint64_t data, const ByteOrder &byteOrder, uint8_t *dest)
Turn long into byte array.
Definition: Util.h:286
static void printBytes(const ByteBuffer &buf, uint32_t position, uint32_t bytes, const std::string &label)
This method takes a byte buffer and prints out the desired number of bytes from the given position...
Definition: Util.h:472
static const DataType SHORT16
16 bit int.
Definition: DataType.h:40
static void readBytes(const std::string &fileName, ByteBuffer &buf)
This method reads part of a file into a ByteBuffer.
Definition: Util.h:620
static const DataType & getDataType(const std::string &type)
This method returns an XML element name given an evio data type.
Definition: Util.h:96
static const DataType UINT32
Unsigned 32 bit int.
Definition: DataType.h:37
static uint32_t findEvioVersion(ByteBuffer &bb, size_t initialPos)
Reads a couple things in a block/record header in order to determine the evio version and endianness ...
Definition: Util.h:412
static uint32_t getPadding(uint32_t length)
Returns number of bytes needed to pad to 4-byte boundary for the given length.
Definition: Util.h:64
static uint32_t getWords(uint32_t length)
Returns length padded to 4-byte boundary for given length in bytes.
Definition: Util.h:52
static const DataType CHAR8
8 bit int.
Definition: DataType.h:42
static uint32_t toInt(uint8_t const *data, ByteOrder const &byteOrder)
Turn 4 bytes into an unsigned 32 bit int.
Definition: Util.h:185
int32_t getInt() const
Relative get method for reading an int value.
Definition: ByteBuffer.cpp:1074
static void writeBytes(const std::string &fileName, ByteBuffer &buf)
This method takes a ByteBuffer and writes its data to a file.
Definition: Util.h:585
static void stringBuilderToStrings(std::string const &strData, bool onlyGoodChars, std::vector< std::string > &strings)
This method extracts an array of strings from a string containing evio string data.
Definition: Util.h:920
static int powerOfTwo(int x, bool roundUp)
Return the power of 2 closest to the given argument.
Definition: Util.h:654
size_t position() const
Returns the position of the buffer.
Definition: ByteBuffer.cpp:518
static void printBytes(const std::shared_ptr< ByteBuffer > buf, uint32_t position, uint32_t bytes, const std::string &label)
This method takes a byte buffer and prints out the desired number of bytes from the given position...
Definition: Util.h:458
static const DataType ALSOSEGMENT
Segment alternate value.
Definition: DataType.h:49
Numerical values associated with evio data types.
Definition: DataType.h:32
static void toIntArray(char const *data, uint32_t dataLen, const ByteOrder &byteOrder, uint32_t *dest)
Turn byte array into an int array.
Definition: Util.h:133
static const uint32_t BIT_INFO_OFFSET
Byte offset from beginning of header to bit info word.
Definition: IBlockHeader.h:48
static uint32_t toInt(char b1, char b2, char b3, char b4, const ByteOrder &byteOrder)
Turn 4 bytes into an unsigned 32 bit int.
Definition: Util.h:156
static const DataType UNKNOWN32
Unknown data type.
Definition: DataType.h:36
static const uint32_t MAGIC_OFFSET
Byte offset from beginning of header to the magic number.
Definition: IBlockHeader.h:45
static uint64_t toLong(uint8_t const *data, ByteOrder const &byteOrder)
Turn 4 bytes into an unsigned 32 bit int.
Definition: Util.h:217
static void stringToASCII(const std::string &input, ByteBuffer &buf)
Return an input string as ASCII in which each character is one byte.
Definition: Util.h:701
uint8_t * array() const
Get a pointer to this buffer&#39;s backing array which contains the data.
Definition: ByteBuffer.cpp:475
static void toBytes(uint16_t data, const ByteOrder &byteOrder, std::vector< uint8_t > &dest, size_t off)
Write short into byte vector.
Definition: Util.h:374
static const DataType USHORT16
Unsigned 16 bit int.
Definition: DataType.h:41
static void unpackRawBytesToStrings(std::vector< uint8_t > &bytes, size_t offset, std::vector< std::string > &strData)
This method extracts an array of strings from byte array of raw evio string data. ...
Definition: Util.h:838
static void expandEnvironmentalVariables(std::string &text)
Substitute environmental variables in a given string when they come in the form, .
Definition: Util.h:1040
static const DataType TAGSEGMENT
Tag segment.
Definition: DataType.h:48
static uint32_t stringToRawSize(const std::string &str)
This method returns the number of bytes in a raw evio format of the given string array (with a single...
Definition: Util.h:758
static void printBytes(uint8_t const *data, uint32_t bytes, const std::string &label)
This method takes a pointer and prints out the desired number of bytes from the given position...
Definition: Util.h:510
ByteBuffer & clear()
Clears this buffer.
Definition: ByteBuffer.cpp:536
static const DataType ULONG64
Unsigned 64 bit int.
Definition: DataType.h:46
static void stringsToRawBytes(std::vector< std::string > &strings, std::vector< uint8_t > &bytes)
This method transforms an array/vector of strings into raw evio format data, not including header...
Definition: Util.h:784
size_t limit() const
Returns the limit, the position of the last valid data byte.
Definition: ByteBuffer.cpp:511
static const ByteOrder ENDIAN_BIG
Big endian byte order.
Definition: ByteOrder.h:58
static void toBytes(uint16_t data, const ByteOrder &byteOrder, uint8_t *dest)
Write short into byte array.
Definition: Util.h:349
static const uint32_t MAGIC_NUMBER
The magic number, should be the value of magicNumber.
Definition: IBlockHeader.h:42