In [0]:
# Import potrebných balíčkov
import os
import sys
from IPython.display import Image, clear_output
from PIL import Image as PilImage
import xml.etree.ElementTree as ET
import shutil

import re
re_image = re.compile(".+\.(jpeg|jpg|gif|png)$")
sys.path.append('yolov3')
In [2]:
# Dátovú množinu stiahneme z github repozitára
!git clone https://github.com/experiencor/raccoon_dataset
# Naklonujeme potrebný kód z github repozitára
!git clone https://github.com/ultralytics/yolov3   
Cloning into 'raccoon_dataset'...
remote: Enumerating objects: 646, done.
remote: Total 646 (delta 0), reused 0 (delta 0), pack-reused 646
Receiving objects: 100% (646/646), 48.00 MiB | 35.16 MiB/s, done.
Resolving deltas: 100% (412/412), done.
Cloning into 'yolov3'...
remote: Enumerating objects: 5368, done.
remote: Total 5368 (delta 0), reused 0 (delta 0), pack-reused 5368
Receiving objects: 100% (5368/5368), 5.99 MiB | 20.72 MiB/s, done.
Resolving deltas: 100% (3700/3700), done.
In [3]:
# Uistíme sa, že máme všetky potrebné dáta
!mkdir -p samples_raccoon
!wget -nc -O samples_raccoon/raccoon_example.jpg https://www.dropbox.com/s/qca8pk3x7ouvoo3/raccoon_example.jpg?dl=1
!mkdir -p outputs_raccoon

!mkdir -p samples_vehical
!wget -nc -O samples_vehical/vehical_example.jpg https://www.dropbox.com/s/c5097zbvr7xeibv/vehical_example.jpg?dl=1
!mkdir -p outputs_vehical

!wget -nc -O yolov3-spp.weights https://www.dropbox.com/s/ditxme19ikyggdt/yolov3-spp.weights?dl=1

!mkdir -p cfg; mkdir -p data; mkdir -p weights
!cp yolov3/data/coco.data data/coco.data; \
  cp yolov3/data/coco.names data/coco.names; \
  cp yolov3/cfg/yolov3-spp.cfg cfg/yolov3-spp.cfg
--2019-09-08 08:18:32--  https://www.dropbox.com/s/qca8pk3x7ouvoo3/raccoon_example.jpg?dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.3.1, 2620:100:6018:1::a27d:301
Connecting to www.dropbox.com (www.dropbox.com)|162.125.3.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/dl/qca8pk3x7ouvoo3/raccoon_example.jpg [following]
--2019-09-08 08:18:32--  https://www.dropbox.com/s/dl/qca8pk3x7ouvoo3/raccoon_example.jpg
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com/cd/0/get/AoIB_moVgKblB9BbDTf5RDmFvBxnEppoTIvwTQnDgSq9e0XpJ6Nl5FRiHCCmBfzfzxeHu-qaJUxs6Sme99aLT2wKNxgQ4mpTpnecLDUs-eanR6azNeqsfb5s_QUrWgjNz0A/file?dl=1# [following]
--2019-09-08 08:18:33--  https://uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com/cd/0/get/AoIB_moVgKblB9BbDTf5RDmFvBxnEppoTIvwTQnDgSq9e0XpJ6Nl5FRiHCCmBfzfzxeHu-qaJUxs6Sme99aLT2wKNxgQ4mpTpnecLDUs-eanR6azNeqsfb5s_QUrWgjNz0A/file?dl=1
Resolving uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com (uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com)... 162.125.8.6, 2620:100:601b:6::a27d:806
Connecting to uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com (uc9d3c984a05739942d4943db28d.dl.dropboxusercontent.com)|162.125.8.6|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58076 (57K) [application/binary]
Saving to: ‘samples_raccoon/raccoon_example.jpg’

samples_raccoon/rac 100%[===================>]  56.71K  --.-KB/s    in 0.02s   

2019-09-08 08:18:33 (3.68 MB/s) - ‘samples_raccoon/raccoon_example.jpg’ saved [58076/58076]

--2019-09-08 08:18:37--  https://www.dropbox.com/s/c5097zbvr7xeibv/vehical_example.jpg?dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.3.1, 2620:100:6018:1::a27d:301
Connecting to www.dropbox.com (www.dropbox.com)|162.125.3.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/dl/c5097zbvr7xeibv/vehical_example.jpg [following]
--2019-09-08 08:18:37--  https://www.dropbox.com/s/dl/c5097zbvr7xeibv/vehical_example.jpg
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com/cd/0/get/AoKibFgT-7AiDXN-yYHsuuE8BBqTic_-4lIv0k0f8X9rXSdt1QAxMCgN85vID9ppkE2988bC9VrYjzTX-jMLR3ZuwJUJq6TVRtZ2B3-PGTvPOZQCOybUegw_QBqWDyAW7Xs/file?dl=1# [following]
--2019-09-08 08:18:38--  https://ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com/cd/0/get/AoKibFgT-7AiDXN-yYHsuuE8BBqTic_-4lIv0k0f8X9rXSdt1QAxMCgN85vID9ppkE2988bC9VrYjzTX-jMLR3ZuwJUJq6TVRtZ2B3-PGTvPOZQCOybUegw_QBqWDyAW7Xs/file?dl=1
Resolving ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com (ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com)... 162.125.3.6, 2620:100:6018:6::a27d:306
Connecting to ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com (ucfd66cd131436eb715929e8ac1c.dl.dropboxusercontent.com)|162.125.3.6|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 103777 (101K) [application/binary]
Saving to: ‘samples_vehical/vehical_example.jpg’

samples_vehical/veh 100%[===================>] 101.34K  --.-KB/s    in 0.03s   

2019-09-08 08:18:38 (3.88 MB/s) - ‘samples_vehical/vehical_example.jpg’ saved [103777/103777]

--2019-09-08 08:18:40--  https://www.dropbox.com/s/ditxme19ikyggdt/yolov3-spp.weights?dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.3.1, 2620:100:6018:1::a27d:301
Connecting to www.dropbox.com (www.dropbox.com)|162.125.3.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/dl/ditxme19ikyggdt/yolov3-spp.weights [following]
--2019-09-08 08:18:40--  https://www.dropbox.com/s/dl/ditxme19ikyggdt/yolov3-spp.weights
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com/cd/0/get/AoJFoL35pUKWmItfycRrFeKK0R7mu8RxTT3oDGq2ONYf5aXdr2Icn7e0PHzRP8dBJnduyx5XCDcw9KC1Rz-11zq76pvpUJ-SPdQVRURjATzjO78ttg-mt-zZ1l3m7QZkWxs/file?dl=1# [following]
--2019-09-08 08:18:41--  https://ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com/cd/0/get/AoJFoL35pUKWmItfycRrFeKK0R7mu8RxTT3oDGq2ONYf5aXdr2Icn7e0PHzRP8dBJnduyx5XCDcw9KC1Rz-11zq76pvpUJ-SPdQVRURjATzjO78ttg-mt-zZ1l3m7QZkWxs/file?dl=1
Resolving ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com (ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com)... 162.125.8.6, 2620:100:601b:6::a27d:806
Connecting to ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com (ucd225f3bbd37c6aad3d1c521095.dl.dropboxusercontent.com)|162.125.8.6|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 252209544 (241M) [application/binary]
Saving to: ‘yolov3-spp.weights’

yolov3-spp.weights  100%[===================>] 240.53M  43.3MB/s    in 5.4s    

2019-09-08 08:18:47 (44.4 MB/s) - ‘yolov3-spp.weights’ saved [252209544/252209544]

In [0]:
#@title --- Pomocný zdrojový kód ---
from IPython.core.magic import register_line_cell_magic
import cv2
import random
from utils.utils import plot_one_box
from google.colab.patches import cv2_imshow

def show_images(directory):
  for filename in os.listdir(directory):
      if re_image.match(filename):
          print("{}:".format(filename))
          img = Image(filename=os.path.join(directory, filename), width=600)
          display(img)

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))
        
def show_annot_image(img_filename, annot_filename, classes):
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
    objs = []

    with open(annot_filename) as file:
        for line in file.readlines():
            line = line.strip()

            if not len(line):
                continue

            parts = line.split(' ')
            if len(parts) != 5:
                raise RuntimeError('The annotation file is malformed.')

            c, [xc, yc, w, h] = int(parts[0]), [float(xx) for xx in parts[1:]]
            objs.append((c, xc, yc, w, h))
            
    img = cv2.imread(img_filename)
    width, height = img.shape[1], img.shape[0]
    
    for (c, xc, yc, w, h) in objs:
        xmin = (xc - w/2) * width
        ymin = (yc - h/2) * height
        xmax = (xc + w/2) * width
        ymax = (yc + h/2) * height
        plot_one_box((xmin, ymin, xmax, ymax),
                     img, label=classes[c],
                     color=colors[c])
        
    cv2_imshow(img)
        
YOLO_CFG = """
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate={LEARNING_RATE}
burn_in={BURN_IN}
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters={YOLO_FILTERS}
activation=linear


[yolo]
mask = 6,7,8
anchors = {ANCHORS}
classes={NUM_CLASSES}
num={YOLO_NUM}
jitter=.3
ignore_thresh = {IGNORE_THRESH}
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61



[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters={YOLO_FILTERS}
activation=linear


[yolo]
mask = 3,4,5
anchors = {ANCHORS}
classes={NUM_CLASSES}
num={YOLO_NUM}
jitter=.3
ignore_thresh = {IGNORE_THRESH}
truth_thresh = 1
random=1



[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride={STRIDE}

[route]
layers = {ROUTE_LAYERS}


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters={YOLO_FILTERS}
activation=linear

[yolo]
mask = 0,1,2
anchors = {ANCHORS}
classes={NUM_CLASSES}
num={YOLO_NUM}
jitter=.3
ignore_thresh = {IGNORE_THRESH}
truth_thresh = 1
random=1
"""

Vizuálna detekcia objektov pomocou YOLOv3

V tomto notebook-u si uvedieme jednoduchý praktický príklad na vizuálnu detekciu objektov pomocou metódy YOLOv3. Použijeme existujúcu implementáciu metódy z GitHub repozitára https://github.com/ultralytics/yolov3. Implementácia nevyužíva balíčky keras a tensorflow s ktorými pracujeme zvyčajne ale balíček torch. Keďže sú však všetky funkcie, s ktorými budeme pracovať, obalené v ďalších rozhraniach špecifických pre YOLO, nebude na tom príliš záležať.

Detekcia objektov pomocou predtrénovanej siete

Na začiatok využijeme sieť s váhami predtrénovanými na dátovej množine COCO. Ide o známu dátovú množinu určenú na vizuálnu detekciu objektov.

Súbory, ktoré chceme otestovať, sú uložené v adresári samples_vehical. Môžeme medzi ne v tomto kroku pridať aj ľubovoľné ďalšie súbory. Na začiatok si ich zobrazme:

In [5]:
show_images("samples_vehical")
vehical_example.jpg:

Výstupom detekcie budú súbory s vyznačenými objektami, ktoré sa uložia v priečinku output_vehical. V prípade, že výsledky detekcie plánujeme ďalej použiť, je samozrejme možné informáciu o objektoch a ich pozícii v obraze získať aj vo forme textového súboru alebo – po miernej úprave zdrojového kódu – ju použiť aj ľubovoľným iným spôsobom.

Aby sme vykonali detekciu, spustíme skript detect.py z priečinka yolov3 s príslušnými argumentmi:

In [6]:
!{sys.executable} yolov3/detect.py                 \
  --source samples_vehical                         \
  --output output_vehical                          \
  --cfg cfg/yolov3-spp.cfg --data data/coco.data   \
  --weight yolov3-spp.weights                      \
  --img-size 608
Namespace(cfg='cfg/yolov3-spp.cfg', conf_thres=0.3, data='data/coco.data', fourcc='mp4v', half=False, img_size=608, nms_thres=0.5, output='output_vehical', source='samples_vehical', weights='yolov3-spp.weights')
Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)

image 1/1 samples_vehical/vehical_example.jpg: 352x608 1 persons, 10 cars, 1 motorcycles, 5 traffic lights, Done. (0.286s)
Results saved to /content/output_vehical
Done. (0.350s)

Výsledok detekcie si opäť môžeme zobraziť pomocou pomocnej funkcie show_images:

In [7]:
show_images("output_vehical")
vehical_example.jpg:

Detekcia iných typov objektov

Dátová množina COCO obsahuje 80 rôznych typov objektov. Nie je to málo, ale je to podstatne menej než napríklad v prípade dátovej množiny ImageNet, ktorá je určená na klasifikáciu. Ak skúsime predtrénovanú sieť aplikovať aj na ďalšie obrázky, s veľkou pravdepodobnosťou narazíme na triedy, ktoré sieť nebude poznať:

In [8]:
!{sys.executable} yolov3/detect.py                 \
  --source samples_raccoon                         \
  --output output_raccoon                          \
  --cfg cfg/yolov3-spp.cfg --data data/coco.data   \
  --weight yolov3-spp.weights                      \
  --img-size 608
Namespace(cfg='cfg/yolov3-spp.cfg', conf_thres=0.3, data='data/coco.data', fourcc='mp4v', half=False, img_size=608, nms_thres=0.5, output='output_raccoon', source='samples_raccoon', weights='yolov3-spp.weights')
Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)

image 1/1 samples_raccoon/raccoon_example.jpg: 416x608 1 bears, Done. (0.126s)
Results saved to /content/output_raccoon
Done. (0.140s)

Ako uvidíme, na obrázku s medvedíkom čistotným (raccoon) bude síce medvedík čistotný označený, ale bude nesprávne klasifikovaný ako medveď (bear), keďže táto trieda sa sieti zdá najbližšia.

In [9]:
show_images("output_raccoon")
raccoon_example.jpg:

Tréning YOLOv3 z vlastných dát

Ďalej si ukážeme, ako je možné sieť YOLOv3 natrénovať na vlastnej dátovej množine. Použijeme dátovú množinu s obrázkami medvedíka čistotného z repozitára raccoon dataset.

Keďže existuje väčšie mmnožstvo formátov, ktoré sa používajú pri anotovaní dát pre vizuálnu detekciu, pri práci s vlastnou dátovou množinou typicky narazíme na to, že anotácie sú v inom formáte než aký podporuje implementácia s ktorou pracujeme. Anotácie preto môže byť potrebné prekódovať. Našťastie ide o pomerne jednoduchú úlohu: formáty sú najčastejšie založené na dobre známych typoch štruktúrovaných súborov ako sú XML, JSON alebo CSV. Existuje veľa balíčkov, ktoré ich umožňujú načítať a ukladať.

Formáty Pascal VOC a YOLO

V rámci našej dátovej množiny sú anotácie uložené vo formáte Pascal VOC. Ide o formát založený na jazyku XML. Anotácia môže vyzerať napríklad takto:

<annotation verified="yes">
    <folder>images</folder>
    <filename>raccoon-1.jpg</filename>
    <path>/Users/datitran/Desktop/raccoon/images/raccoon-1.jpg</path>
    <source>
        <database>Unknown</database>
    </source>
    <size>
        <width>650</width>
        <height>417</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>raccoon</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>81</xmin>
            <ymin>88</ymin>
            <xmax>522</xmax>
            <ymax>408</ymax>
        </bndbox>
    </object>
</annotation>

Každému obrázku zodpovedá jeden XML súbor s anotáciami. Každý objekt je anotovaný svojou triedou (atribút name) a koordinátmi ohraničujúceho obdĺžnika (xmin, xmax, ymin, ymax).

Naša implementácia pracuje namiesto toho s anotáciami vo formáte YOLO:

0 0.463846 0.594724 0.678462 0.767386

Tu je každý objekt anotovaný vo formáte

identifikátor_triedy horizontálny_stred vertikálny_stred šírka výška

pričom identifikátorom triedy je jej poradované číslo (zoznam tried je v osobitnom súbore), ďalej nasledujú koordináty stredu ohraničujúceho obdĺžnika a jeho šírka a výška. Koordináty, šírka aj výška sú normalizované vo vzťahu ku veľkosti obrázka.

Konverzia anotácií z formátu Pascal VOC do YOLO

Ďalej pokračujme tým, že špecifikujeme cesty ku jednotlivým častiam dátovej množiny: ku obrázkom, ku anotáciám vo formáte Pascal VOC a ku priečinku, kde sa majú uložiť nové anotácie vo formáte YOLO. Ak posledne zmienený priečinok ešte neexistuje, vytvoríme ho.

In [0]:
xml_path = "raccoon_dataset/annotations"
img_path = "raccoon_dataset/images"
labels_path = "raccoon_dataset/labels"

# uistíme sa, že priečinok existuje
!mkdir -p {labels_path}

Ďalej si musíme zadefinovať funkciu, ktorá z Pascal VOC súboru extrahuje záznamy o objektoch v štruktúre podľa formátu YOLO:

In [0]:
def extract_yolo_annot(xml_filename):
    classes = set()
    annotations = []
    tree = ET.parse(xml_filename)
    root = tree.getroot()
    
    img_width = int(root.find('size/width').text)
    img_height = int(root.find('size/height').text)

    for member in root.findall('object'):
        c = member.find('name').text
        classes.add(c)

        # extrahujeme koordináty
        xmin = int(member.find('bndbox/xmin').text)
        xmax = int(member.find('bndbox/xmax').text)
        ymin = int(member.find('bndbox/ymin').text)
        ymax = int(member.find('bndbox/ymax').text)

        # vynútime, aby neboli menšie než 0
        # ani väčšie než rozmery obrázka
        xmin = max(0, min(xmin, img_width))
        xmax = max(0, min(xmax, img_width))
        ymin = max(0, min(ymin, img_height))
        ymax = max(0, min(ymax, img_height))

        # uistíme sa, že xmin <= xmax; ymin <= ymax
        xmin, xmax = min(xmin, xmax), max(xmin, xmax)
        ymin, ymax = min(ymin, ymax), max(ymin, ymax)

        # vypočítame stredy a rozmery a normalizujeme ich
        xc = (xmin + xmax) / 2 / img_width
        yc = (ymin + ymax) / 2 / img_height
        w = (xmax - xmin) / img_width
        h = (ymax - ymin) / img_height

        annotations.append((c, xc, yc, w, h))
        
    # navrátime anotácie a množinu všetkých existujúcich tried
    return annotations, classes

Vytvorenú funkciu aplikujeme na všetky súbory a výsledky uložíme v osobitnom priečinku. Vytvoríme si pritom aj zoznam všetkých tried:

In [0]:
file_annots = []
classes = set()

for filename in os.listdir(xml_path):
    basename, ext = os.path.splitext(filename)    
    if ext == '.xml':
        annotations, cl = extract_yolo_annot(os.path.join(xml_path, filename))
        file_annots.append((basename, annotations))
        classes.update(cl)
        
class_dict = {c: i for i, c in enumerate(classes)}
class_list = [c for c in classes]
NUM_CLASSES = len(class_list)

for basename, annotations in file_annots:
    with open(os.path.join(labels_path, basename + ".txt"), "w") as file:
        for annot in annotations:
            file.write("{} {:.6f} {:.6f} {:.6f} {:.6f}\n".format(
                class_dict[annot[0]], *annot[1:]))

Aby sme sa uistili, že sme konverziu implementovali správne, môžeme si zobraziť výsledné anotácie pre jeden obrázok:

In [13]:
show_annot_image(os.path.join(img_path, "raccoon-1.jpg"),
                 os.path.join(labels_path, "raccoon-1.txt"),
                 ["raccoon"])

Zoznam obrázkov a rozdelenie dátovej množiny

Ďalej si vytoríme zoznam všetkých obrázkov, ktoré sú k dispozícii a rozdelíme si ich na tréningové a validačné.

In [0]:
img_filenames = []

for filename in sorted(os.listdir(img_path)):
    if re_image.match(filename):
        img_filenames.append(filename)
        
train_filenames = img_filenames[:-8]
valid_filenames = img_filenames[-8:]

Vytvorenie konfiguračných súborov

Ďalej je potrebné vytvoriť niekoľko konfiguračných súborov. Ako prvé vytvoríme zoznamy tréningových a validačných obrázkov:

In [0]:
with open("data/raccoon_train.txt", "w") as file:
    for fname in train_filenames:
        file.write(os.path.join(img_path, fname) + "\n")

with open("data/raccoon_valid.txt", "w") as file:
    for fname in valid_filenames:
        file.write(os.path.join(img_path, fname) + "\n")

Zoznam všetkých tried zapíšeme do osobitného súboru:

In [0]:
with open("data/raccoon.names", "w") as file:
    for c in class_list:
        file.write(c + "\n")

Vytvoríme hlavný súbor, ktorý sa odkazuje na jednotlivé konfiguračné súbory.

In [0]:
%%writetemplate data/raccoon.data
classes={NUM_CLASSES}
train=./data/raccoon_train.txt
valid=./data/raccoon_valid.txt
names=./data/raccoon.names
backup=backup/
eval=raccoon

Pomocou posledného súboru konfigurujeme architektúru a hyperparametre siete. Keďže súbor je veľmi dlhý, vytvoríme ho podľa preddefinovanej šablóny a meniť budeme len niektoré parametre:

In [0]:
BURN_IN = 1000

ANCHORS = "10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326"
YOLO_NUM = 9

YOLO_FILTERS = (4 + 1 + NUM_CLASSES) * 3
ROUTE_LAYERS = "-1, 36"
STRIDE = 2
IMG_SIZE= 416
IGNORE_THRESH = 0.5
LEARNING_RATE = 1e-4
In [0]:
with open("cfg/raccoon_yolo.cfg", "w") as file:
    file.write(YOLO_CFG.format(**globals()))

Tréning vizuálneho detektora

Po vytvorení všetkých potrebných konfiguračných súborov sme pripravení natrénovať detektor. Špecifikovať treba najmä počet epoch a veľkosť dávky. Okrem toho chceme jadro siete inicializovať pomocou váh predtrénovaných na dátovej množine ImageNet, preto špecifikujeme, že sa majú použiť váhy "darknet53.conv.74". Súbor s váhami sa stiahne automaticky.

In [20]:
!{sys.executable} yolov3/train.py          \
    --batch-size 25 --epochs 40            \
    --weights "darknet53.conv.74"          \
    --img-size {IMG_SIZE} --cache-images   \
    --cfg cfg/raccoon_yolo.cfg             \
    --data data/raccoon.data               \
    --rect --accumulate=1      
Namespace(accumulate=1, arc='defaultpw', batch_size=25, bucket='', cache_images=True, cfg='cfg/raccoon_yolo.cfg', data='data/raccoon.data', epochs=40, evolve=False, img_size=416, img_weights=False, multi_scale=False, nosave=False, notest=False, prebias=False, rect=True, resume=False, transfer=False, var=None, weights='darknet53.conv.74')
Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)

Downloading https://pjreddie.com/media/files/darknet53.conv.74
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  154M  100  154M    0     0   636k      0  0:04:09  0:04:09 --:--:--  498k
Reading image shapes: 100% 192/192 [00:00<00:00, 3800.59it/s]
Reading labels (192 found, 0 missing, 0 empty for 192 images): 100% 192/192 [00:00<00:00, 12945.80it/s]
Reading images: 100% 192/192 [00:00<00:00, 269.83it/s]
Model Summary: 222 layers, 6.15237e+07 parameters, 6.15237e+07 gradients
Starting training for 40 epochs...

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      0/39     4.01G      1.89      2.94         0      4.83        19       416: 100% 8/8 [01:04<00:00,  8.94s/it]
Reading image shapes: 100% 8/8 [00:00<00:00, 3088.02it/s]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:03<00:00,  3.77s/it]
                 all         8         8         0         0         0         0

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      1/39     7.47G      2.16      2.38         0      4.54        19       416: 100% 8/8 [00:15<00:00,  2.03s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.16it/s]
                 all         8         8         0         0         0         0

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      2/39     10.7G      1.77      2.13         0       3.9        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.10it/s]
                 all         8         8         0         0         0         0

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      3/39     10.7G      1.71      1.95         0      3.66        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.22it/s]
                 all         8         8    0.0234      0.75    0.0393    0.0455

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      4/39     10.7G      1.75      1.78         0      3.53        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:01<00:00,  1.39s/it]
                 all         8         8    0.0076     0.625   0.00886     0.015

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      5/39     10.7G      1.57       1.7         0      3.27        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:01<00:00,  1.28s/it]
                 all         8         8   0.00838     0.625    0.0322    0.0165

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      6/39     10.7G       1.4      1.63         0      3.04        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.43it/s]
                 all         8         8    0.0373      0.75    0.0711     0.071

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      7/39     10.7G      1.37      1.57         0      2.93        19       416: 100% 8/8 [00:15<00:00,  1.94s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.68it/s]
                 all         8         8    0.0361     0.375    0.0241    0.0659

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      8/39     10.7G       1.3      1.53         0      2.83        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.64it/s]
                 all         8         8    0.0526     0.625     0.189    0.0971

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
      9/39     10.7G      1.33      1.43         0      2.76        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.64it/s]
                 all         8         8    0.0575     0.625    0.0955     0.105

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     10/39     10.7G      1.16      1.41         0      2.57        19       416: 100% 8/8 [00:15<00:00,  1.94s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.77it/s]
                 all         8         8     0.135     0.625     0.475     0.222

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     11/39     10.7G      1.12      1.33         0      2.46        19       416: 100% 8/8 [00:15<00:00,  1.95s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.85it/s]
                 all         8         8     0.211         1     0.815     0.348

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     12/39     10.7G      1.14      1.29         0      2.43        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.95it/s]
                 all         8         8       0.2         1     0.863     0.333

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     13/39     10.7G      1.23      1.24         0      2.47        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.96it/s]
                 all         8         8     0.364         1     0.959     0.533

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     14/39     10.7G      1.09      1.21         0       2.3        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.97it/s]
                 all         8         8     0.364         1     0.972     0.533

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     15/39     10.7G      1.14      1.17         0      2.31        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.88it/s]
                 all         8         8     0.533         1     0.863     0.696

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     16/39     10.7G     0.988      1.18         0      2.17        19       416: 100% 8/8 [00:15<00:00,  1.92s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.82it/s]
                 all         8         8       0.5         1     0.904     0.667

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     17/39     10.7G     0.989      1.07         0      2.06        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.91it/s]
                 all         8         8     0.471         1     0.986      0.64

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     18/39     10.7G      1.01      1.05         0      2.07        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.96it/s]
                 all         8         8     0.533         1         1     0.696

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     19/39     10.7G     0.901      1.03         0      1.93        19       416: 100% 8/8 [00:15<00:00,  1.92s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.02it/s]
                 all         8         8     0.533         1     0.986     0.696

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     20/39     10.7G     0.934     0.978         0      1.91        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.04it/s]
                 all         8         8       0.5         1     0.975     0.667

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     21/39     10.7G     0.957     0.971         0      1.93        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.98it/s]
                 all         8         8       0.4         1         1     0.571

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     22/39     10.7G     0.878     0.972         0      1.85        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.02it/s]
                 all         8         8     0.533         1         1     0.696

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     23/39     10.7G     0.837      0.89         0      1.73        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.99it/s]
                 all         8         8       0.4         1     0.975     0.571

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     24/39     10.7G     0.852     0.807         0      1.66        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.03it/s]
                 all         8         8     0.571         1     0.975     0.727

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     25/39     10.7G     0.771     0.835         0      1.61        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.95it/s]
                 all         8         8     0.533         1         1     0.696

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     26/39     10.7G     0.826     0.762         0      1.59        19       416: 100% 8/8 [00:15<00:00,  1.94s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.01it/s]
                 all         8         8     0.615         1         1     0.762

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     27/39     10.7G     0.794     0.746         0      1.54        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.01it/s]
                 all         8         8     0.889         1         1     0.941

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     28/39     10.7G     0.824     0.768         0      1.59        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.07it/s]
                 all         8         8     0.889         1         1     0.941

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     29/39     10.7G     0.766     0.809         0      1.58        19       416: 100% 8/8 [00:15<00:00,  1.94s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.97it/s]
                 all         8         8     0.615         1     0.972     0.762

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     30/39     10.7G      0.83     0.723         0      1.55        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.02it/s]
                 all         8         8     0.889         1         1     0.941

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     31/39     10.7G     0.882     0.706         0      1.59        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.03it/s]
                 all         8         8     0.636     0.875     0.781     0.737

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     32/39     10.7G     0.873     0.636         0      1.51        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.00it/s]
                 all         8         8       0.5     0.875     0.781     0.636

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     33/39     10.7G     0.844     0.638         0      1.48        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.98it/s]
                 all         8         8     0.583     0.875     0.812       0.7

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     34/39     10.7G      0.72     0.644         0      1.36        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.99it/s]
                 all         8         8     0.583     0.875     0.797       0.7

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     35/39     10.7G     0.733     0.603         0      1.34        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.94it/s]
                 all         8         8     0.538     0.875     0.797     0.667

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     36/39     10.7G     0.732     0.591         0      1.32        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  2.01it/s]
                 all         8         8     0.583     0.875     0.797       0.7

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     37/39     10.7G     0.694     0.598         0      1.29        19       416: 100% 8/8 [00:15<00:00,  1.93s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.99it/s]
                 all         8         8     0.538     0.875     0.797     0.667

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     38/39     10.7G     0.694      0.62         0      1.31        19       416: 100% 8/8 [00:15<00:00,  1.92s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.96it/s]
                 all         8         8     0.538     0.875     0.797     0.667

     Epoch   gpu_mem      GIoU       obj       cls     total   targets  img_size
     39/39     10.7G     0.712     0.577         0      1.29        19       416: 100% 8/8 [00:15<00:00,  1.94s/it]
               Class    Images   Targets         P         R       mAP        F1: 100% 1/1 [00:00<00:00,  1.49it/s]
                 all         8         8    0.0383     0.875     0.797    0.0733
40 epochs completed in 0.220 hours.

Testovanie detektora

V ďalšom kroku si validačné obrázky okopírujeme do osobitného adresára a skúsime na ne aplikovať výsledný vizuálny detektor:

In [0]:
!mkdir -p raccoon_test; mkdir -p raccoon_res

for fname in valid_filenames:
    shutil.copy(os.path.join(img_path, fname),
                os.path.join("raccoon_test", fname))
In [22]:
!{sys.executable} yolov3/detect.py         \
    --weight weights/best.pt               \
    --img-size {IMG_SIZE}                  \
    --source=raccoon_test                  \
    --output raccoon_res                   \
    --cfg cfg/raccoon_yolo.cfg             \
    --data data/raccoon.data
Namespace(cfg='cfg/raccoon_yolo.cfg', conf_thres=0.3, data='data/raccoon.data', fourcc='mp4v', half=False, img_size=416, nms_thres=0.5, output='raccoon_res', source='raccoon_test', weights='weights/best.pt')
Using CUDA device0 _CudaDeviceProperties(name='Tesla K80', total_memory=11441MB)

image 1/8 raccoon_test/raccoon-92.jpg: 288x416 1 raccoons, Done. (0.082s)
image 2/8 raccoon_test/raccoon-93.jpg: 352x416 1 raccoons, Done. (0.083s)
image 3/8 raccoon_test/raccoon-94.jpg: 288x416 1 raccoons, Done. (0.073s)
image 4/8 raccoon_test/raccoon-95.jpg: 416x352 1 raccoons, Done. (0.073s)
image 5/8 raccoon_test/raccoon-96.jpg: 416x416 1 raccoons, Done. (0.085s)
image 6/8 raccoon_test/raccoon-97.jpg: 352x416 1 raccoons, Done. (0.067s)
image 7/8 raccoon_test/raccoon-98.jpg: 320x416 1 raccoons, Done. (0.066s)
image 8/8 raccoon_test/raccoon-99.jpg: 384x416 1 raccoons, Done. (0.066s)
Results saved to /content/raccoon_res
Done. (0.668s)

Výsledok si zobrazíme:

In [23]:
show_images("raccoon_res")
raccoon-95.jpg:
raccoon-96.jpg:
raccoon-94.jpg:
raccoon-98.jpg:
raccoon-99.jpg:
raccoon-92.jpg:
raccoon-93.jpg:
raccoon-97.jpg:

Anotácia obrázkov

Zatiaľ čo pri klasifikácii sa každému obrázku priraďuje len trieda, pri vizuálnej detekcii môže každý obrázok obsahovať jeden alebo viacero objektov a udáva sa nielen ich trieda, ale aj pozícia (väčšinou vo forme ohraničujúceho obdĺžnika) v obraze. Na anotáciu obrázkov pri vytváraní dátovej množiny je preto potrebné použiť špeciálny nástroj, ktorý umožní objekty zodpovedajúcim spôsobom označiť.

Takých nástrojov existuje samozrejme väčšie množstvo – napríklad nástroj VIA od skupiny VGG založený na HTML a javascripte. Existuje viacero formátov, v ktorých môžu byť anotácie uložené – najčastejšie sú založené na súboroch typu XML, JSON alebo CSV. V praxi je typicky potrebné napísať jednoduchý kód, ktorý skonvertuje anotácie do formátu, ktorý podporuje systém na vizuálnu detekciu.

In [0]: