Skip to content

Classification

Bases: BaseModel

Use a foundation classification model to auto-label data.

Source code in autodistill/classification/classification_base_model.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
@dataclass
class ClassificationBaseModel(BaseModel):
    """
    Use a foundation classification model to auto-label data.
    """

    ontology: CaptionOntology

    @abstractmethod
    def predict(self, input: str) -> sv.Classifications:
        """
        Run inference on the model.
        """
        pass

    def label(
        self,
        input_folder: str,
        extension: str = ".jpg",
        output_folder: str | None = None,
    ) -> sv.ClassificationDataset:
        """
        Label a dataset and save it in a classification folder structure.
        """
        if output_folder is None:
            output_folder = input_folder + "_labeled"

        os.makedirs(output_folder, exist_ok=True)

        image_paths = glob.glob(input_folder + "/*" + extension)
        detections_map = {}

        progress_bar = tqdm(image_paths, desc="Labeling images")
        for f_path in progress_bar:
            progress_bar.set_description(desc=f"Labeling {f_path}", refresh=True)

            detections = self.predict(f_path)
            detections_map[f_path] = detections

        dataset = sv.ClassificationDataset(
            self.ontology.classes(), image_paths, detections_map
        )

        train_cs, test_cs = dataset.split(
            split_ratio=0.7, random_state=None, shuffle=True
        )
        test_cs, valid_cs = test_cs.split(
            split_ratio=0.5, random_state=None, shuffle=True
        )

        train_cs.as_folder_structure(root_directory_path=output_folder + "/train")

        test_cs.as_folder_structure(root_directory_path=output_folder + "/test")

        valid_cs.as_folder_structure(root_directory_path=output_folder + "/valid")

        print("Labeled dataset created - ready for distillation.")
        return dataset

label(input_folder, extension='.jpg', output_folder=None)

Label a dataset and save it in a classification folder structure.

Source code in autodistill/classification/classification_base_model.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def label(
    self,
    input_folder: str,
    extension: str = ".jpg",
    output_folder: str | None = None,
) -> sv.ClassificationDataset:
    """
    Label a dataset and save it in a classification folder structure.
    """
    if output_folder is None:
        output_folder = input_folder + "_labeled"

    os.makedirs(output_folder, exist_ok=True)

    image_paths = glob.glob(input_folder + "/*" + extension)
    detections_map = {}

    progress_bar = tqdm(image_paths, desc="Labeling images")
    for f_path in progress_bar:
        progress_bar.set_description(desc=f"Labeling {f_path}", refresh=True)

        detections = self.predict(f_path)
        detections_map[f_path] = detections

    dataset = sv.ClassificationDataset(
        self.ontology.classes(), image_paths, detections_map
    )

    train_cs, test_cs = dataset.split(
        split_ratio=0.7, random_state=None, shuffle=True
    )
    test_cs, valid_cs = test_cs.split(
        split_ratio=0.5, random_state=None, shuffle=True
    )

    train_cs.as_folder_structure(root_directory_path=output_folder + "/train")

    test_cs.as_folder_structure(root_directory_path=output_folder + "/test")

    valid_cs.as_folder_structure(root_directory_path=output_folder + "/valid")

    print("Labeled dataset created - ready for distillation.")
    return dataset

predict(input) abstractmethod

Run inference on the model.

Source code in autodistill/classification/classification_base_model.py
21
22
23
24
25
26
@abstractmethod
def predict(self, input: str) -> sv.Classifications:
    """
    Run inference on the model.
    """
    pass