create_base64_file_validator(*allowed_mime_types)

Creates a function that validates the MIME type of a base64-encoded file.

Parameters:
  • allowed_mime_types (tuple, default: () ) –

    A tuple of strings representing the allowed MIME types.

Returns:
  • Callable[[str, ValidationInfo], str]: A function to validate base64-encoded files.

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def create_base64_file_validator(*allowed_mime_types):
    """
    Creates a function that validates the MIME type of a base64-encoded file.

    Args:
        allowed_mime_types (tuple): A tuple of strings representing the allowed MIME types.

    Returns:
        Callable[[str, ValidationInfo], str]: A function to validate base64-encoded files.
    """

    def validate_base64_encoded_file(cls, v, info):
        """
        Validate the MIME type of a base64-encoded file.
        Raises ValueError if the MIME type is not what is expected.
        """
        try:
            file_bytes = base64.b64decode(v, validate=True)
        except ValueError:
            raise ValueError("Invalid base64 encoding")

        # Use python-magic to check MIME type
        mime = magic.Magic(mime=True)
        mime_type = mime.from_buffer(file_bytes)

        if mime_type not in allowed_mime_types:
            allowed_types_formatted = ", ".join(allowed_mime_types)
            raise ValueError(
                f"Incorrect file type. Required types: {allowed_types_formatted}"
            )

        return v

    return validate_base64_encoded_file

create_file_validator(*allowed_mime_types)

Creates a dependency function that validates the MIME type of an uploaded file.

Parameters:
  • allowed_mime_types (tuple, default: () ) –

    A tuple of strings representing the allowed MIME types.

Returns:
  • Callable[[UploadFile], UploadFile]: A function that checks if the uploaded file's MIME type

  • is in the allowed MIME types.

Examples: validate_docx_file = create_file_validator("application/vnd.openxmlformats-officedocument.wordprocessingml.document") validate_pdf_file = create_file_validator("application/pdf", "application/x-pdf")

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def create_file_validator(*allowed_mime_types):
    """
    Creates a dependency function that validates the MIME type of an uploaded file.

    Args:
        allowed_mime_types (tuple): A tuple of strings representing the allowed MIME types.

    Returns:
        Callable[[UploadFile], UploadFile]: A function that checks if the uploaded file's MIME type
        is in the allowed MIME types.

    Examples:
    validate_docx_file = create_file_validator("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
    validate_pdf_file = create_file_validator("application/pdf", "application/x-pdf")
    """

    def validate_file(file: UploadFile = File(...)):
        """
        Validate the MIME type of the uploaded file.
        Raises HTTPException if the MIME type is not what is expected.
        """
        if file.content_type not in allowed_mime_types:
            allowed_types_formatted = ", ".join(allowed_mime_types)
            raise HTTPException(
                status_code=415,
                detail="Incorrect file type. Required type: " + allowed_types_formatted,
            )
        return file

    return validate_file

file_to_base64(filepath)

Converts a file to a base64-encoded string.

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
12
13
14
15
def file_to_base64(filepath):
    """Converts a file to a base64-encoded string."""
    with open(filepath, "rb") as file:
        return base64.b64encode(file.read()).decode("utf-8")

ingest_docx(file) async

The function ingest_docx reads the content of a DOCX file as bytes, writes it to a temporary file, and then loads the document from the temporary file.

:param file: The file parameter in the ingest_docx function seems to be a file-like object that supports asynchronous reading operations. When await file.read() is called, it reads the content of the file as bytes. This content is then written to a temporary file with a .docx :return: The functioningest_docxreturns a tuple containing two values: 1. The name of the temporary file where the DOCX content was written. 2. An instance of theDocument` class representing the loaded document from the temporary file.

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
async def ingest_docx(file):
    """
    The function `ingest_docx` reads the content of a DOCX file as bytes, writes it to a temporary file,
    and then loads the document from the temporary file.

    :param file: The `file` parameter in the `ingest_docx` function seems to be a file-like object that
    supports asynchronous reading operations. When `await file.read()` is called, it reads the content
    of the file as bytes. This content is then written to a temporary file with a `.docx
    :return: The function `ingest_docx` returns a tuple containing two values:
    1. The name of the temporary file where the DOCX content was written.
    2. An instance of the `Document` class representing the loaded document from the temporary file.
    """
    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as temp_doc:
        content = await file.read()  # Read file content as bytes
        # have to do this because of async context, otherwise calling function moves on
        temp_doc.write(content)
        temp_doc.flush()  # Ensure all content is written to disk

        # Load the document from the temporary file
        return temp_doc.name, Document(temp_doc.name)  # Load the document here

ingest_docx_bytes(content)

The function ingest_docx_bytes reads the content of a DOCX file from bytes, saves it to a temporary file, and then loads the document using the Document class.

:param content: The ingest_docx_bytes function you provided seems to be designed to ingest the content of a DOCX file as bytes and load it using the python-docx library. However, it looks like the content parameter is missing in your message. Could you please provide the content parameter so :return: The ingest_docx_bytes function returns a tuple containing two values: 1. The name of the temporary file where the content was written (temp_doc.name) 2. The Document object representing the content loaded from the temporary file

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def ingest_docx_bytes(content):
    """
    The function `ingest_docx_bytes` reads the content of a DOCX file from bytes, saves it to a
    temporary file, and then loads the document using the `Document` class.

    :param content: The `ingest_docx_bytes` function you provided seems to be designed to ingest the
    content of a DOCX file as bytes and load it using the `python-docx` library. However, it looks like
    the content parameter is missing in your message. Could you please provide the content parameter so
    :return: The `ingest_docx_bytes` function returns a tuple containing two values:
    1. The name of the temporary file where the content was written (temp_doc.name)
    2. The Document object representing the content loaded from the temporary file
    """
    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as temp_doc:

        temp_doc.write(content)
        temp_doc.flush()  # Ensure all content is written to disk

        # Load the document from the temporary file
        return temp_doc.name, Document(temp_doc.name)  # Load the document here

markdown_to_docx_temporary_file(content, template_location=None)

The function prepare_docx_response converts Markdown content to a DOCX file and returns the temporary file path.

:param content: The content parameter in the prepare_docx_response function is the text or data that you want to convert to a DOCX file. This content will be processed and converted into a DOCX file using the convert_markdown_docx function :param template_location: The prepare_docx_response function takes two parameters: :return: The function prepare_docx_response returns the file path of the temporary .docx file that is created after converting the provided content (in markdown format) to a .docx file using the specified template location.

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def markdown_to_docx_temporary_file(content, template_location=None):
    """
    The function `prepare_docx_response` converts Markdown content to a DOCX file and returns the
    temporary file path.

    :param content: The `content` parameter in the `prepare_docx_response` function is the text or data
    that you want to convert to a DOCX file. This content will be processed and converted into a DOCX
    file using the `convert_markdown_docx` function
    :param template_location: The `prepare_docx_response` function takes two parameters:
    :return: The function `prepare_docx_response` returns the file path of the temporary .docx file that
    is created after converting the provided content (in markdown format) to a .docx file using the
    specified template location.
    """
    docx_data = convert_markdown_docx(content, template_location)
    # Using tempfile to save the output file temporarily
    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as temp_file:
        temp_file.write(docx_data)
        temp_file_path = temp_file.name
    return temp_file_path

validate_date(date_str=Query(..., description='The start date in YYYY-MM-DD format'))

Custom dependency that validates and parses a date string.

Args: date_str (str): A date string in the YYYY-MM-DD format.

Returns: datetime: The parsed datetime object.

Raises: HTTPException: If the date string is not in the correct format.

Source code in llm_utils/aiweb_common/file_operations/file_handling.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def validate_date(
    date_str: str = Query(..., description="The start date in YYYY-MM-DD format")
) -> datetime:
    """
    Custom dependency that validates and parses a date string.

    Args:
    date_str (str): A date string in the YYYY-MM-DD format.

    Returns:
    datetime: The parsed datetime object.

    Raises:
    HTTPException: If the date string is not in the correct format.
    """
    try:
        return datetime.strptime(date_str, "%Y-%m-%d")
    except ValueError as exc:
        raise HTTPException(
            status_code=400, detail="start_date must be in YYYY-MM-DD format"
        ) from exc