Skip to content

Classification

Bases: BaseModel

Use a foundation classification model to auto-label data.

Source code in autodistill/classification/classification_base_model.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
@dataclass
class ClassificationBaseModel(BaseModel):
    """
    Use a foundation classification model to auto-label data.
    """

    ontology: CaptionOntology

    @abstractmethod
    def predict(self, input: str) -> sv.Classifications:
        """
        Run inference on the model.
        """
        pass

    def label(
        self, input_folder: str, extension: str = ".jpg", output_folder: str = None
    ) -> sv.ClassificationDataset:
        """
        Label a dataset and save it in a classification folder structure.
        """
        if output_folder is None:
            output_folder = input_folder + "_labeled"

        os.makedirs(output_folder, exist_ok=True)

        images_map = {}
        detections_map = {}

        files = glob.glob(input_folder + "/*" + extension)
        progress_bar = tqdm(files, desc="Labeling images")
        # iterate through images in input_folder
        for f_path in progress_bar:
            progress_bar.set_description(desc=f"Labeling {f_path}", refresh=True)
            image = cv2.imread(f_path)

            f_path_short = os.path.basename(f_path)
            images_map[f_path_short] = image.copy()
            detections = self.predict(f_path)
            detections_map[f_path_short] = detections

        dataset = sv.ClassificationDataset(
            self.ontology.classes(), images_map, detections_map
        )

        train_cs, test_cs = dataset.split(
            split_ratio=0.7, random_state=None, shuffle=True
        )
        test_cs, valid_cs = test_cs.split(
            split_ratio=0.5, random_state=None, shuffle=True
        )

        train_cs.as_folder_structure(root_directory_path=output_folder + "/train")

        test_cs.as_folder_structure(root_directory_path=output_folder + "/test")

        valid_cs.as_folder_structure(root_directory_path=output_folder + "/valid")

        print("Labeled dataset created - ready for distillation.")
        return dataset

label(input_folder, extension='.jpg', output_folder=None)

Label a dataset and save it in a classification folder structure.

Source code in autodistill/classification/classification_base_model.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def label(
    self, input_folder: str, extension: str = ".jpg", output_folder: str = None
) -> sv.ClassificationDataset:
    """
    Label a dataset and save it in a classification folder structure.
    """
    if output_folder is None:
        output_folder = input_folder + "_labeled"

    os.makedirs(output_folder, exist_ok=True)

    images_map = {}
    detections_map = {}

    files = glob.glob(input_folder + "/*" + extension)
    progress_bar = tqdm(files, desc="Labeling images")
    # iterate through images in input_folder
    for f_path in progress_bar:
        progress_bar.set_description(desc=f"Labeling {f_path}", refresh=True)
        image = cv2.imread(f_path)

        f_path_short = os.path.basename(f_path)
        images_map[f_path_short] = image.copy()
        detections = self.predict(f_path)
        detections_map[f_path_short] = detections

    dataset = sv.ClassificationDataset(
        self.ontology.classes(), images_map, detections_map
    )

    train_cs, test_cs = dataset.split(
        split_ratio=0.7, random_state=None, shuffle=True
    )
    test_cs, valid_cs = test_cs.split(
        split_ratio=0.5, random_state=None, shuffle=True
    )

    train_cs.as_folder_structure(root_directory_path=output_folder + "/train")

    test_cs.as_folder_structure(root_directory_path=output_folder + "/test")

    valid_cs.as_folder_structure(root_directory_path=output_folder + "/valid")

    print("Labeled dataset created - ready for distillation.")
    return dataset

predict(input) abstractmethod

Run inference on the model.

Source code in autodistill/classification/classification_base_model.py
22
23
24
25
26
27
@abstractmethod
def predict(self, input: str) -> sv.Classifications:
    """
    Run inference on the model.
    """
    pass