YOLO Annotation (txt) to VOC Annotation (xml)
YOLO 학습때 사용한 annotation 파일은 txt 형식이고, 간결하다.
darkflow로 학습하려니 annotation 형식이 달라서 학습이 되지 않았다.
xml 형식이 필요하대서 찾아보니까 txt에 비해 많이 복잡했다.
하나하나 다시 박스 처리를 해주는건 미친짓이기에 txt 파일을 xml로 변환했다.
나와 같은 어려움을 겪는 사람이 있을게 분명하기에 코드를 공유해봅니다..
from lxml import etree
from PIL import Image
import csv
import os
# fw is txt file that composed train image file path
IMG_PATH = "D:/__Project__/darkflow-master/data/train_traffic_light"
fw = os.listdir(IMG_PATH)
# path of save xml file
save_path = 'D:/__Project__/darkflow-master/data/annotations/'
# txt_folder is txt file root that using darknet rectbox
txt_folder = 'D:/__Project__/darkflow-master/data/train_traffic_light_TXT'
# edit ypur label set
labels = ['traffic light']
def csvread(fn):
with open(fn, 'r') as csvfile:
list_arr = []
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
return list_arr
def convert_label(txt_file):
if((txt_file[0]) == str(0)):
label = 'traffic light'
return label
# core code = convert the yolo txt file to the x_min,x_max...
def extract_coor(txt_file, img_width, img_height):
x_rect_mid = float(txt_file[1])
y_rect_mid = float(txt_file[2])
width_rect = float(txt_file[3])
height_rect = float(txt_file[4])
x_min_rect = ((2 * x_rect_mid * img_width) - (width_rect * img_width)) / 2
x_max_rect = ((2 * x_rect_mid * img_width) + (width_rect * img_width)) / 2
y_min_rect = ((2 * y_rect_mid * img_height) -
(height_rect * img_height)) / 2
y_max_rect = ((2 * y_rect_mid * img_height) +
(height_rect * img_height)) / 2
return x_min_rect, x_max_rect, y_min_rect, y_max_rect
for line in fw:
root = etree.Element("annotation")
# try debug to check your path
img_style = IMG_PATH.split('/')[-1]
img_name = line
image_info = IMG_PATH + "/" + line
img_txt_root = txt_folder + "/" + line[:-4]
txt = ".txt"
txt_path = img_txt_root + txt
txt_file = csvread(txt_path)
# read the image information
img_size = Image.open(image_info).size
img_width = img_size[0]
img_height = img_size[1]
img_depth = Image.open(image_info).layers
folder = etree.Element("folder")
folder.text = "%s" % (img_style)
filename = etree.Element("filename")
filename.text = "%s" % (img_name)
path = etree.Element("path")
path.text = "%s" % (IMG_PATH)
source = etree.Element("source")
##################source - element##################
source_database = etree.SubElement(source, "database")
source_database.text = "Unknown"
size = etree.Element("size")
####################size - element##################
image_width = etree.SubElement(size, "width")
image_width.text = "%d" % (img_width)
image_height = etree.SubElement(size, "height")
image_height.text = "%d" % (img_height)
image_depth = etree.SubElement(size, "depth")
image_depth.text = "%d" % (img_depth)
segmented = etree.Element("segmented")
segmented.text = "0"
for ii in range(len(txt_file)):
label = convert_label(txt_file[ii][0])
x_min_rect, x_max_rect, y_min_rect, y_max_rect = extract_coor(
txt_file[ii], img_width, img_height)
object = etree.Element("object")
####################object - element##################
name = etree.SubElement(object, "name")
name.text = "%s" % (label)
pose = etree.SubElement(object, "pose")
pose.text = "Unspecified"
truncated = etree.SubElement(object, "truncated")
truncated.text = "0"
difficult = etree.SubElement(object, "difficult")
difficult.text = "0"
bndbox = etree.SubElement(object, "bndbox")
xmin = etree.SubElement(bndbox, "xmin")
xmin.text = "%d" % (x_min_rect)
ymin = etree.SubElement(bndbox, "ymin")
ymin.text = "%d" % (y_min_rect)
xmax = etree.SubElement(bndbox, "xmax")
xmax.text = "%d" % (x_max_rect)
ymax = etree.SubElement(bndbox, "ymax")
ymax.text = "%d" % (y_max_rect)
file_output = etree.tostring(root, pretty_print=True, encoding='UTF-8')
# print(file_output.decode('utf-8'))
ff = open('%s%s.xml' % (save_path, img_name[:-4]), 'w', encoding="utf-8")