CategoryManager

Source code in LabeLMaker/utils/category.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
class CategoryManager:
    @staticmethod
    def define_categories(ui_helper, key_prefix, unique_values_str=None, get_file_examples=False):
        """
        ui_helper: a helper object wrapping Streamlit calls so that category logic stays separate.
        """
        ui_helper.markdown("---")
        if unique_values_str:
            unique_values = [
                val.strip()
                for val in unique_values_str.split(",")
                if val.strip().lower() not in ["nan", "none"]
            ]
            num_categories = len(unique_values)
        else:
            num_categories = int(
                ui_helper.number_input(
                    "Enter the number of categories",
                    min_value=2,
                    value=2,
                    step=1,
                    key=f"{key_prefix}_categories",
                )
            )
            unique_values = None

        categories_dict = {}
        all_examples = []

        for i in range(num_categories):
            with ui_helper.expander(f"Category {i+1}", expanded=True):
                if unique_values:
                    category_value = ui_helper.text_input(
                        f"Enter label for category {i+1}",
                        value=unique_values[i].title(),
                        key=f"{key_prefix}_text_input_{i+1}",
                    )
                else:
                    category_value = ui_helper.text_input(
                        f"Enter label for category {i+1}", key=f"{key_prefix}_text_input_{i+1}"
                    )
                category_description = ui_helper.text_input(
                    f"Enter description for category {i+1} (optional but recommended)",
                    "",
                    key=f"{key_prefix}_desc_input_{i+1}",
                )
                categories_dict[category_value.lower()] = category_description or ""

                if get_file_examples:
                    uploaded_files = ui_helper.file_uploader(
                        "Upload example files for this category",
                        type=["docx", "pdf"],
                        accept_multiple_files=True,
                        key=f"example_{i}",
                    )
                    if uploaded_files:
                        fm = FileManager()
                        filenames, texts = fm.process_multiple_files(uploaded_files)
                        if texts:
                            examples_for_category = [
                                Example(text_with_label=text, label=category_value)
                                for text in texts
                            ]
                            all_examples.extend(examples_for_category)

        return categories_dict, all_examples

    @staticmethod
    def create_request(index_list, df_text, categories_dict, examples=None):
        categories = [
            Categories(name=name, description=desc) for name, desc in categories_dict.items()
        ]

        # Convert examples from tuple to Example objects if necessary.
        if examples:
            new_examples = []
            for ex in examples:
                # If ex is already an Example instance, leave it as is.
                if isinstance(ex, Example):
                    new_examples.append(ex)
                # Otherwise, assume it's a tuple (text, label) and convert it.
                elif isinstance(ex, (list, tuple)) and len(ex) == 2:
                    new_examples.append(Example(text_with_label=ex[0], label=ex[1]))
                else:
                    raise ValueError(
                        "Example must be an Example object or a tuple of (text, label)."
                    )
            examples = new_examples

        cat_req = CategorizationRequest(
            unique_ids=index_list, text_to_label=df_text, categories=categories, examples=examples
        )
        return cat_req

define_categories(ui_helper, key_prefix, unique_values_str=None, get_file_examples=False) staticmethod

ui_helper: a helper object wrapping Streamlit calls so that category logic stays separate.

Source code in LabeLMaker/utils/category.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@staticmethod
def define_categories(ui_helper, key_prefix, unique_values_str=None, get_file_examples=False):
    """
    ui_helper: a helper object wrapping Streamlit calls so that category logic stays separate.
    """
    ui_helper.markdown("---")
    if unique_values_str:
        unique_values = [
            val.strip()
            for val in unique_values_str.split(",")
            if val.strip().lower() not in ["nan", "none"]
        ]
        num_categories = len(unique_values)
    else:
        num_categories = int(
            ui_helper.number_input(
                "Enter the number of categories",
                min_value=2,
                value=2,
                step=1,
                key=f"{key_prefix}_categories",
            )
        )
        unique_values = None

    categories_dict = {}
    all_examples = []

    for i in range(num_categories):
        with ui_helper.expander(f"Category {i+1}", expanded=True):
            if unique_values:
                category_value = ui_helper.text_input(
                    f"Enter label for category {i+1}",
                    value=unique_values[i].title(),
                    key=f"{key_prefix}_text_input_{i+1}",
                )
            else:
                category_value = ui_helper.text_input(
                    f"Enter label for category {i+1}", key=f"{key_prefix}_text_input_{i+1}"
                )
            category_description = ui_helper.text_input(
                f"Enter description for category {i+1} (optional but recommended)",
                "",
                key=f"{key_prefix}_desc_input_{i+1}",
            )
            categories_dict[category_value.lower()] = category_description or ""

            if get_file_examples:
                uploaded_files = ui_helper.file_uploader(
                    "Upload example files for this category",
                    type=["docx", "pdf"],
                    accept_multiple_files=True,
                    key=f"example_{i}",
                )
                if uploaded_files:
                    fm = FileManager()
                    filenames, texts = fm.process_multiple_files(uploaded_files)
                    if texts:
                        examples_for_category = [
                            Example(text_with_label=text, label=category_value)
                            for text in texts
                        ]
                        all_examples.extend(examples_for_category)

    return categories_dict, all_examples