Skip to content

Commit

Permalink
Merge branch 'enhancement/testing2' into charades
Browse files Browse the repository at this point in the history
  • Loading branch information
zhong-al committed Dec 14, 2024
2 parents b44ad9e + f56b4cd commit 8fe4de0
Show file tree
Hide file tree
Showing 18 changed files with 1,469 additions and 22 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Test

on:
push:

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -r requirements.txt
python -m pip install hatchling
python -m pip install --no-build-isolation .
- name: Running unit tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN_TESTING }}
run: |
python -m unittest tests/test_cvat2slowfast.py
python -m unittest tests/test_cvat2ultralytics.py
python -m unittest tests/test_detector2cvat.py
python -m unittest tests/test_miniscene2behavior.py
python -m unittest tests/test_player.py
python -m unittest tests/test_tracks_extractor.py
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -177,5 +177,18 @@ cython_debug/
# Mac System
.DS_Store

# Tool output
*.json
*.xml
*.jpg
*.yaml
*.csv
*.txt

# Model files
*.pyth
*.pyth.zip
*.yml


helper_scripts/mini-scenes
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,6 @@ player --folder path_to_folder [--save] [--imshow]


```
cvat2slowfast --miniscene path_to_mini_scenes --dataset dataset_name --classes path_to_classes_json [--old2new path_to_old2new_json]
cvat2slowfast --miniscene path_to_mini_scenes --dataset dataset_name --classes path_to_classes_json [--old2new path_to_old2new_json] [--no_images]
```

6 changes: 6 additions & 0 deletions ethogram/label2index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"Grevy": 0,
"Zebra": 0,
"Baboon": 1,
"Giraffe": 2
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ ultralytics~=8.0.36
pandas>=1.3.5
pillow==10.4.0
scikit-learn==1.5.1
huggingface_hub
28 changes: 17 additions & 11 deletions src/kabr_tools/cvat2ultralytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import json
import cv2
import ruamel.yaml as yaml
from ruamel.yaml import YAML
from lxml import etree
from collections import OrderedDict
from tqdm import tqdm
Expand Down Expand Up @@ -39,8 +39,10 @@ def cvat2ultralytics(video_path: str, annotation_path: str,
shutil.rmtree(f"{dataset}")

with open(f"{dataset}.yaml", "w") as file:
yaml.dump(yaml.load(dataset_file, Loader=yaml.RoundTripLoader, preserve_quotes=True),
file, Dumper=yaml.RoundTripDumper)
yaml = YAML(typ='rt')
yaml.preserve_quotes = True
data = yaml.load(dataset_file)
yaml.dump(data, file)

if not os.path.exists(f"{dataset}/images/train"):
os.makedirs(f"{dataset}/images/train")
Expand All @@ -57,6 +59,7 @@ def cvat2ultralytics(video_path: str, annotation_path: str,

if label2index is None:
label2index = {
"Grevy": 0,
"Zebra": 0,
"Baboon": 1,
"Giraffe": 2
Expand All @@ -69,21 +72,24 @@ def cvat2ultralytics(video_path: str, annotation_path: str,
for root, dirs, files in os.walk(annotation_path):
for file in files:
video_name = os.path.join(video_path + root[len(annotation_path):], os.path.splitext(file)[0])

if os.path.exists(video_name + ".MP4"):
videos.append(video_name + ".MP4")
else:
videos.append(video_name + ".mp4")

annotations.append(os.path.join(root, file))
if file.endswith(".xml"):
if os.path.exists(video_name + ".MP4"):
videos.append(video_name + ".MP4")
else:
videos.append(video_name + ".mp4")
annotations.append(os.path.join(root, file))

for i, (video, annotation) in enumerate(zip(videos, annotations)):
print(f"{i + 1}/{len(annotations)}:")
print(f"{i + 1}/{len(annotations)}:", flush=True)

if not os.path.exists(video):
print(f"Path {video} does not exist.")
continue

if not os.path.exists(annotation):
print(f"Path {annotation} does not exist.")
continue

# Parse CVAT for video 1.1 annotation file.
root = etree.parse(annotation).getroot()
name = os.path.splitext(video.split("/")[-1])[0]
Expand Down
29 changes: 19 additions & 10 deletions src/kabr_tools/miniscene2behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ def get_input_clip(cap: cv2.VideoCapture, cfg: CfgNode, keyframe_idx: int) -> li
# https://github.com/facebookresearch/SlowFast/blob/bac7b672f40d44166a84e8c51d1a5ba367ace816/slowfast/visualization/ava_demo_precomputed_boxes.py
seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
assert keyframe_idx < total_frames, f"keyframe_idx: {keyframe_idx}" \
f" >= total_frames: {total_frames}"
seq = get_sequence(
keyframe_idx,
seq_length // 2,
cfg.DATA.SAMPLING_RATE,
total_frames,
)

clip = []
for frame_idx in seq:
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
Expand Down Expand Up @@ -124,29 +127,34 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,

# find all tracks
tracks = []
frames = {}
for track in root.iterfind("track"):
track_id = track.attrib["id"]
tracks.append(track_id)
frames[track_id] = []

# find all frames
# TODO: rewrite - some tracks may have different frames
assert len(tracks) > 0, "No tracks found in track file"
frames = []
for box in track.iterfind("box"):
frames.append(int(box.attrib["frame"]))
# find all frames
for box in track.iterfind("box"):
frames[track_id].append(int(box.attrib["frame"]))

# run model on miniscene
for track in tracks:
video_file = f"{miniscene_path}/{track}.mp4"
cap = cv2.VideoCapture(video_file)
for frame in tqdm(frames, desc=f"{track} frames"):
inputs = get_input_clip(cap, cfg, frame)
index = 0
for frame in tqdm(frames[track], desc=f"{track} frames"):
try:
inputs = get_input_clip(cap, cfg, index)
except AssertionError as e:
print(e)
break
index += 1

if cfg.NUM_GPUS:
# transfer the data to the current GPU device.
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
for i, input_clip in enumerate(inputs):
inputs[i] = input_clip.cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)

Expand All @@ -163,6 +171,7 @@ def annotate_miniscene(cfg: CfgNode, model: torch.nn.Module,
if frame % 20 == 0:
pd.DataFrame(label_data).to_csv(
output_path, sep=" ", index=False)
cap.release()
pd.DataFrame(label_data).to_csv(output_path, sep=" ", index=False)


Expand Down
Empty file added tests/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions tests/examples/DETECTOR1/DJI_tracks.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version='1.0' encoding='UTF-8'?>
<annotations>
<track id="0" label="Grevy" source="manual">
<box frame="0" outside="0" occluded="0" keyframe="1" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="1" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="2" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="3" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="4" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="5" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="6" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="7" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="8" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="9" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="10" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="21" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="22" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="23" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1978.00" z_order="0"/>
<box frame="24" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="25" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="26" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="27" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="28" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="29" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="30" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
</track>
<track id="1" label="Grevy" source="manual">
<box frame="0" outside="0" occluded="0" keyframe="1" xtl="2513.00" ytl="1921.00" xbr="2615.00" ybr="1991.00" z_order="0"/>
<box frame="1" outside="0" occluded="0" keyframe="0" xtl="2513.00" ytl="1921.00" xbr="2615.00" ybr="1991.00" z_order="0"/>
<box frame="2" outside="0" occluded="0" keyframe="0" xtl="2512.00" ytl="1921.00" xbr="2614.00" ybr="1991.00" z_order="0"/>
<box frame="3" outside="0" occluded="0" keyframe="0" xtl="2512.00" ytl="1921.00" xbr="2613.00" ybr="1991.00" z_order="0"/>
<box frame="4" outside="0" occluded="0" keyframe="0" xtl="2511.00" ytl="1921.00" xbr="2613.00" ybr="1991.00" z_order="0"/>
<box frame="5" outside="0" occluded="0" keyframe="0" xtl="2510.00" ytl="1921.00" xbr="2612.00" ybr="1991.00" z_order="0"/>
<box frame="6" outside="0" occluded="0" keyframe="0" xtl="2510.00" ytl="1921.00" xbr="2612.00" ybr="1991.00" z_order="0"/>
<box frame="7" outside="0" occluded="0" keyframe="0" xtl="2509.00" ytl="1921.00" xbr="2611.00" ybr="1991.00" z_order="0"/>
<box frame="8" outside="0" occluded="0" keyframe="0" xtl="2508.00" ytl="1921.00" xbr="2610.00" ybr="1991.00" z_order="0"/>
<box frame="9" outside="0" occluded="0" keyframe="0" xtl="2508.00" ytl="1921.00" xbr="2610.00" ybr="1991.00" z_order="0"/>
<box frame="10" outside="0" occluded="0" keyframe="0" xtl="2507.00" ytl="1921.00" xbr="2609.00" ybr="1991.00" z_order="0"/>
<box frame="11" outside="0" occluded="0" keyframe="0" xtl="2507.00" ytl="1921.00" xbr="2608.00" ybr="1991.00" z_order="0"/>
<box frame="12" outside="0" occluded="0" keyframe="0" xtl="2506.00" ytl="1921.00" xbr="2608.00" ybr="1991.00" z_order="0"/>
<box frame="13" outside="0" occluded="0" keyframe="0" xtl="2505.00" ytl="1921.00" xbr="2607.00" ybr="1991.00" z_order="0"/>
<box frame="14" outside="0" occluded="0" keyframe="0" xtl="2505.00" ytl="1921.00" xbr="2607.00" ybr="1991.00" z_order="0"/>
<box frame="15" outside="0" occluded="0" keyframe="0" xtl="2504.00" ytl="1920.00" xbr="2606.00" ybr="1991.00" z_order="0"/>
<box frame="16" outside="0" occluded="0" keyframe="0" xtl="2503.00" ytl="1920.00" xbr="2605.00" ybr="1991.00" z_order="0"/>
<box frame="17" outside="0" occluded="0" keyframe="0" xtl="2503.00" ytl="1920.00" xbr="2605.00" ybr="1991.00" z_order="0"/>
<box frame="18" outside="0" occluded="0" keyframe="0" xtl="2502.00" ytl="1920.00" xbr="2604.00" ybr="1991.00" z_order="0"/>
<box frame="19" outside="0" occluded="0" keyframe="0" xtl="2502.00" ytl="1920.00" xbr="2603.00" ybr="1990.00" z_order="0"/>
<box frame="20" outside="0" occluded="0" keyframe="0" xtl="2501.00" ytl="1920.00" xbr="2603.00" ybr="1990.00" z_order="0"/>
</track>
</annotations>
49 changes: 49 additions & 0 deletions tests/examples/MINISCENE1/metadata/DJI_tracks.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version='1.0' encoding='UTF-8'?>
<annotations>
<track id="0" label="Grevy" source="manual">
<box frame="0" outside="0" occluded="0" keyframe="1" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="1" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="2" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="3" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2133.00" ybr="1978.00" z_order="0"/>
<box frame="4" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="5" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="6" outside="0" occluded="0" keyframe="0" xtl="2011.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="7" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="8" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="9" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1883.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="10" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="11" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="12" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2134.00" ybr="1978.00" z_order="0"/>
<box frame="13" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1978.00" z_order="0"/>
<box frame="14" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="15" outside="0" occluded="0" keyframe="0" xtl="2012.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="16" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="17" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="18" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="19" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
<box frame="20" outside="0" occluded="0" keyframe="0" xtl="2013.00" ytl="1882.00" xbr="2135.00" ybr="1977.00" z_order="0"/>
</track>
<track id="1" label="Grevy" source="manual">
<box frame="0" outside="0" occluded="0" keyframe="1" xtl="2513.00" ytl="1921.00" xbr="2615.00" ybr="1991.00" z_order="0"/>
<box frame="1" outside="0" occluded="0" keyframe="0" xtl="2513.00" ytl="1921.00" xbr="2615.00" ybr="1991.00" z_order="0"/>
<box frame="2" outside="0" occluded="0" keyframe="0" xtl="2512.00" ytl="1921.00" xbr="2614.00" ybr="1991.00" z_order="0"/>
<box frame="3" outside="0" occluded="0" keyframe="0" xtl="2512.00" ytl="1921.00" xbr="2613.00" ybr="1991.00" z_order="0"/>
<box frame="4" outside="0" occluded="0" keyframe="0" xtl="2511.00" ytl="1921.00" xbr="2613.00" ybr="1991.00" z_order="0"/>
<box frame="5" outside="0" occluded="0" keyframe="0" xtl="2510.00" ytl="1921.00" xbr="2612.00" ybr="1991.00" z_order="0"/>
<box frame="6" outside="0" occluded="0" keyframe="0" xtl="2510.00" ytl="1921.00" xbr="2612.00" ybr="1991.00" z_order="0"/>
<box frame="7" outside="0" occluded="0" keyframe="0" xtl="2509.00" ytl="1921.00" xbr="2611.00" ybr="1991.00" z_order="0"/>
<box frame="8" outside="0" occluded="0" keyframe="0" xtl="2508.00" ytl="1921.00" xbr="2610.00" ybr="1991.00" z_order="0"/>
<box frame="9" outside="0" occluded="0" keyframe="0" xtl="2508.00" ytl="1921.00" xbr="2610.00" ybr="1991.00" z_order="0"/>
<box frame="10" outside="0" occluded="0" keyframe="0" xtl="2507.00" ytl="1921.00" xbr="2609.00" ybr="1991.00" z_order="0"/>
<box frame="11" outside="0" occluded="0" keyframe="0" xtl="2507.00" ytl="1921.00" xbr="2608.00" ybr="1991.00" z_order="0"/>
<box frame="12" outside="0" occluded="0" keyframe="0" xtl="2506.00" ytl="1921.00" xbr="2608.00" ybr="1991.00" z_order="0"/>
<box frame="13" outside="0" occluded="0" keyframe="0" xtl="2505.00" ytl="1921.00" xbr="2607.00" ybr="1991.00" z_order="0"/>
<box frame="14" outside="0" occluded="0" keyframe="0" xtl="2505.00" ytl="1921.00" xbr="2607.00" ybr="1991.00" z_order="0"/>
<box frame="15" outside="0" occluded="0" keyframe="0" xtl="2504.00" ytl="1920.00" xbr="2606.00" ybr="1991.00" z_order="0"/>
<box frame="16" outside="0" occluded="0" keyframe="0" xtl="2503.00" ytl="1920.00" xbr="2605.00" ybr="1991.00" z_order="0"/>
<box frame="17" outside="0" occluded="0" keyframe="0" xtl="2503.00" ytl="1920.00" xbr="2605.00" ybr="1991.00" z_order="0"/>
<box frame="18" outside="0" occluded="0" keyframe="0" xtl="2502.00" ytl="1920.00" xbr="2604.00" ybr="1991.00" z_order="0"/>
<box frame="19" outside="0" occluded="0" keyframe="0" xtl="2502.00" ytl="1920.00" xbr="2603.00" ybr="1990.00" z_order="0"/>
<box frame="20" outside="0" occluded="0" keyframe="0" xtl="2501.00" ytl="1920.00" xbr="2603.00" ybr="1990.00" z_order="0"/>
</track>
</annotations>
Loading

0 comments on commit 8fe4de0

Please sign in to comment.