前言 在一次做项目的时候,团队分配任务做数据集,用labelimage来打标,汇总数据时发现xml中的图片路径各不相同,于是就写了这个工具来修改xml中的图片路径。
目录 原理 先打开一个xml文件,观察一下它的结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 <annotation > <folder > zhua_qu</folder > <filename > 2021_03_16_16_42_11_296.jpg</filename > <path > D:\xyolo\images\train\2021_03_16_16_42_11_296.jpg</path > <source > <database > Unknown</database > </source > <size > <width > 640</width > <height > 480</height > <depth > 3</depth > </size > <segmented > 0</segmented > <object > <name > zhua_qu</name > <pose > Unspecified</pose > <truncated > 0</truncated > <difficult > 0</difficult > <bndbox > <xmin > 389</xmin > <ymin > 225</ymin > <xmax > 522</xmax > <ymax > 359</ymax > </bndbox > </object > </annotation >
可以发现图片文件名在<filename>和</filename>标签之间,图片路径在<path>和</path>标签之间,理论上来说我们只要将<path>和</path>之间的图片路径替换成我们想要的就可以了
这个路径我们怎么生成,将要替换成的路径加上文件名就可以了,在这里我们不用考虑c++烦人的中文乱码问题,就算我们要替换成中文路径,但是我们不考虑读取这些路径,直接替换xml中的图片路径即可。
到这里,替换的问题已经有思路了,然后我们再考虑批处理的问题,这里我用了<io.h>这个库来对指定的目录进行遍历,如果遇到目录就用递归的方法继续遍历,对检测到的文件进行过滤,留下xml文件的路径,压入一个vector容器中,后面pop出来调用替换的函数或者类来处理。
代码 main.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 #include <iostream> #include <vector> #include <string> #include <chrono> #include "replace.h" #include "getfiles.h" void help (const char * name) { std::string progname = name; size_t lastPos = progname.find_last_of ("/\\" ); progname = progname.substr (lastPos + 1 ); std::cout << std::endl; std::cout << "This tool replaces the file path of the image in the XML file in the dataset" << std::endl << std::endl; std::cout << "Usage: " << std::endl; std::cout << "\t.\\" << progname << " [DataSet Path] [Replace Path]" << std::endl << std::endl; std::cout << "For example: " << std::endl; std::cout << "\t.\\main.exe C:\\Users\\17740\\Desktop\\DataSet\\ D:\\xyolo\\images\\train\\" << std::endl << std::endl; } int main (int argc, char **argv) { if (argc == 1 || (argc == 2 &&(strcmp (argv[1 ], "--help" ) == 0 || strcmp (argv[1 ], "-h" ) == 0 ))) { help (argv[0 ]); return 0 ; } auto start = std::chrono::system_clock::now (); std::string filePath = argv[1 ]; std::string replace = argv[2 ]; std::vector<std::string> files; if (filePath.find_last_of ("\\" ) == filePath.length () - 1 ) { filePath.pop_back (); } getFiles (filePath, files); std::cout << "Replace..." ; int size = files.size (); for (int i = 0 ; i < size; i++) { Replace r (replace, files[i]) ; r.work (); } std::cout << "\rDone... " << std::endl << std::endl; std::chrono::duration<double > diff = std::chrono::system_clock::now () - start; std::cout << "\tUsed: " << diff.count () << " Second" << std::endl; return 0 ; }
replace.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 #include <iostream> #include <fstream> #include <string> class Replace { private : std::string str; std::fstream f; char buf[1024 ]; private : std::string replace; std::string xmlname; std::string filename; std::string filename_label_start; std::string filename_label_end; std::string path_label_start; std::string path_label__end; int filename_pos_start; int filename_pos_end; int filenameLength; int path_pos_start; int path_pos__end; int pathLength; public : bool work () ; public : Replace (const std::string& replace, const std::string& xmlname); };
replace.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 #include "replace.h" Replace::Replace (const std::string& replace, const std::string& xmlname) { this ->replace = replace; this ->xmlname = xmlname; filename_label_start = "<filename>" ; filename_label_end = "</filename>" ; path_label_start = "<path>" ; path_label__end = "</path>" ; filename_pos_start = 0 ; filename_pos_end = 0 ; filenameLength = 0 ; path_pos_start = 0 ; path_pos__end = 0 ; pathLength = 0 ; if (this ->replace.find_last_of ("\\" ) < this ->replace.length () - 1 ) { this ->replace.push_back ('\\' ); } } bool Replace::work () { f.open (xmlname.c_str ()); if (!f.is_open ()) { return false ; } while (!f.eof ()) { f.getline (buf, 1024 ); str.append (buf); str.append ("\n" ); } str.pop_back (); filename_pos_start = str.find (filename_label_start.c_str ()); filename_pos_end = str.find (filename_label_end.c_str ()); filenameLength = filename_pos_end - filename_pos_start - filename_label_start.length (); filename = str.substr (filename_pos_start + filename_label_start.length (), filenameLength); path_pos_start = str.find (path_label_start.c_str ()); path_pos__end = str.find (path_label__end.c_str ()); pathLength = path_pos__end - path_pos_start - path_label_start.length (); str.replace (path_pos_start + path_label_start.length (), pathLength, (replace + filename).c_str ()); f.clear (); f.seekp (0 , std::ios::beg); f << str; f.close (); return true ; }
getfiles.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 #include <iostream> #include <vector> #include <string> #include <io.h> void getFiles (std::string path, std::vector<std::string>& files) { long long hFile = 0 ; struct _finddata_t fileinfo; std::string p; if ((hFile = _findfirst(p.assign (path).append ("\\*" ).c_str (), &fileinfo)) != -1 ) { do { if ((fileinfo.attrib & _A_SUBDIR)) { if (strcmp (fileinfo.name,"." ) != 0 && strcmp (fileinfo.name,".." ) != 0 ) { getFiles (p.assign (path).append ("\\" ).append (fileinfo.name), files); } } else { std::string s = fileinfo.name; size_t lastPos = s.find_last_of ("." ); if (strcmp (s.substr (lastPos + 1 ).c_str (), "xml" ) == 0 ) { files.push_back (p.assign (path).append ("\\" ).append (fileinfo.name)); } } }while (_findnext(hFile, &fileinfo) == 0 ); _findclose(hFile); } }
仓库链接 使用方法 VocFilePathRepalce.exe [DataSet Path] [Replace Path]
VocFilePathRepalce.exe [数据集所在的文件路径] [替换xml中的图片路径]
例子:
.\VocFilePathRepalce.exe F:\DataSet\ D:\xyolo\images\train\
改进思路 使用了<io.h>这个库,这个库只在windows能稳定使用,在linux上用不了,要针对linux对获取xml文件的函数进行改进