from typing import List from PIL import Image import base64 import io from langchain_openai import OpenAIEmbeddings from dashscope import MultiModalEmbedding from src.conf.settings import model_settings from langfuse import observe from src.utils.file.image_util import image_util class Embedding: """Embedding模型工具""" def __init__(self, model_name: str = None, api_key: str = None): """ 初始化Embedding模型 Args: model_name: 模型名称,若为None则使用配置文件中的值 api_key: API密钥,若为None则使用配置文件中的值 """ # 获取模型配置 self.model_provider = model_settings.model_provider self.embedding_model_name = model_name or model_settings.embedding_model_name self.multi_embedding_model_name = model_settings.multimodal_embedding_model_name self.base_url = model_settings.base_url self.api_key = api_key or model_settings.api_key self.dashscope_api_key = model_settings.dashscope_api_key @observe(name="text_embedding", as_type="embedding") def get_text_embedding(self, text: str) -> List[float]: """ 获取文本的embedding Args: text: 要获取embedding的文本 Returns: List[float]: 文本的embedding向量 """ try: # 使用langchain_openai初始化OpenAI Embeddings模型 embeddings = OpenAIEmbeddings( model=self.embedding_model_name, base_url=self.base_url, api_key=self.api_key ) embedding = embeddings.embed_query(text) return embedding except Exception as e: raise Exception(f"文本embedding生成失败: {str(e)}") @observe(name="texts_embedding", as_type="embedding") def get_texts_embedding(self, texts: List[str]) -> List[List[float]]: """ 获取多个文本的embedding Args: texts: 要获取embedding的文本列表 Returns: List[List[float]]: 文本列表的embedding向量列表 """ try: # 使用langchain_openai初始化OpenAI Embeddings模型 embeddings = OpenAIEmbeddings( model=self.embedding_model_name, base_url=self.base_url, api_key=self.api_key ) embeddings = embeddings.embed_documents(texts) return embeddings except Exception as e: raise Exception(f"多个文本embedding生成失败: {str(e)}") @observe(name="image_embedding", as_type="embedding") def get_image_embedding(self, image: Image.Image) -> List[float]: """ 获取图像的embedding Args: image: PIL图像对象 Returns: List[float]: 图像的embedding向量 """ try: # 将图像转换为base64 buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) # 压缩图片字节流 compressed_bytes = image_util._compress_image_to_bytes(buffer) image_base64 = base64.b64encode(compressed_bytes).decode("utf-8") # 构建输入项 item = [ { "image": f"data:image/png;base64,{image_base64}" } ] response = MultiModalEmbedding.call( model=self.multi_embedding_model_name, api_key=self.dashscope_api_key, input=item ) if response.status_code == 200: return response.output["embeddings"][0]["embedding"] except Exception as e: raise Exception(f"图像embedding生成失败: {str(e)}") @observe(name="multimodal_embedding", as_type="embedding") def get_multimodal_embedding(self, text: str, image: Image.Image) -> List[float]: """ 获取多模态(文本+图像)的embedding Args: text: 文本内容 image: PIL图像对象 Returns: List[float]: 多模态的embedding向量 """ try: item = [] if image is not None: buffer = io.BytesIO() image.save(buffer, format="PNG") buffer.seek(0) # 压缩图片字节流 compressed_bytes = image_util._compress_image_to_bytes(buffer) image_base64 = base64.b64encode(compressed_bytes).decode("utf-8") item.append({'image': f"data:image/png;base64,{image_base64}"}) # 判断text部位None或者不为空字符串 if text is not None and text.strip() != "": item.append({'text': text}) response = MultiModalEmbedding.call( model=self.multi_embedding_model_name, api_key=self.dashscope_api_key, input=item ) if response.status_code == 200: return response.output["embeddings"][0]["embedding"] else: raise Exception(f"Error: {response.message}") except Exception as e: raise Exception(f"多模态embedding生成失败: {str(e)}") # 全局单例 def get_embedding_model() -> Embedding: """ 获取全局单例的Embedding模型 Returns: Embedding: 全局单例的Embedding模型 """ return Embedding( model_name=model_settings.embedding_model_name, api_key=model_settings.api_key )