WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit 5091190

Browse files
image url to base64
1 parent 94f3122 commit 5091190

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

code/backend/batch/utilities/helpers/embedders/push_embedder.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from typing import List
55
from urllib.parse import urlparse
6-
6+
import urllib.request
77
from ...helpers.llm_helper import LLMHelper
88
from ...helpers.env_helper import EnvHelper
99
from ..azure_computer_vision_client import AzureComputerVisionClient
@@ -18,6 +18,8 @@
1818
from ..document_loading_helper import DocumentLoading
1919
from ..document_chunking_helper import DocumentChunking
2020
from ...common.source_document import SourceDocument
21+
import base64
22+
from mimetypes import guess_type
2123

2224
logger = logging.getLogger(__name__)
2325

@@ -101,6 +103,27 @@ def __embed(
101103
else:
102104
logger.warning("No documents to upload.")
103105

106+
def __local_image_to_data_url(self, image_path):
107+
"""Convert a local image file or URL to a data URL."""
108+
mime_type, _ = guess_type(image_path)
109+
if mime_type is None:
110+
mime_type = 'application/octet-stream'
111+
112+
# Check if the image_path is a URL or a local file path
113+
parsed_url = urlparse(image_path)
114+
if parsed_url.scheme in ('http', 'https'):
115+
# Download the image from the URL
116+
logger.info(f"Downloading image from URL: {image_path}")
117+
with urllib.request.urlopen(image_path) as response:
118+
image_data = response.read()
119+
base64_encoded_data = base64.b64encode(image_data).decode('utf-8')
120+
else:
121+
# Read from local file
122+
with open(image_path, "rb") as image_file:
123+
base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
124+
125+
return f"data:{mime_type};base64,{base64_encoded_data}"
126+
104127
def __generate_image_caption(self, source_url):
105128
logger.info(f"Generating image caption for URL: {source_url}")
106129
model = self.env_helper.AZURE_OPENAI_VISION_MODEL
@@ -119,7 +142,7 @@ def __generate_image_caption(self, source_url):
119142
"text": "Describe this image in detail. Limit the response to 500 words.",
120143
"type": "text",
121144
},
122-
{"image_url": {"url": source_url}, "type": "image_url"},
145+
{"image_url": {"url": self.__local_image_to_data_url(source_url)}, "type": "image_url"},
123146
],
124147
},
125148
]

0 commit comments

Comments
 (0)