fix: Improved base64 image handling and Windows compatibility
This commit is contained in:
@@ -15,7 +15,7 @@ class OpenRouterMultimodalServer {
|
||||
constructor() {
|
||||
// Retrieve API key and default model from environment variables
|
||||
const apiKey = process.env.OPENROUTER_API_KEY;
|
||||
const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
|
||||
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
|
||||
|
||||
// Check if API key is provided
|
||||
if (!apiKey) {
|
||||
|
||||
@@ -137,7 +137,7 @@ export class ToolHandlers {
|
||||
properties: {
|
||||
image_path: {
|
||||
type: 'string',
|
||||
description: 'Path to the image file to analyze (must be an absolute path)',
|
||||
description: 'Path to the image file to analyze (can be an absolute file path, URL, or base64 data URL starting with "data:")',
|
||||
},
|
||||
question: {
|
||||
type: 'string',
|
||||
@@ -167,7 +167,7 @@ export class ToolHandlers {
|
||||
properties: {
|
||||
url: {
|
||||
type: 'string',
|
||||
description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
|
||||
description: 'URL or data URL of the image (use http(s):// for web images, absolute file paths for local files, or data:image/xxx;base64,... for base64 encoded images)',
|
||||
},
|
||||
alt: {
|
||||
type: 'string',
|
||||
|
||||
@@ -1,14 +1,32 @@
|
||||
import path from 'path';
|
||||
import { promises as fs } from 'fs';
|
||||
import sharp from 'sharp';
|
||||
import fetch from 'node-fetch';
|
||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||
import OpenAI from 'openai';
|
||||
import fetch from 'node-fetch';
|
||||
import { findSuitableFreeModel } from './multi-image-analysis.js';
|
||||
|
||||
// Default model for image analysis
|
||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||
|
||||
let sharp: any;
|
||||
try {
|
||||
sharp = require('sharp');
|
||||
} catch (e) {
|
||||
console.error('Warning: sharp module not available, using fallback image processing');
|
||||
// Mock implementation that just passes through the base64 data
|
||||
sharp = (buffer: Buffer) => ({
|
||||
metadata: async () => ({ width: 800, height: 600 }),
|
||||
resize: () => ({
|
||||
jpeg: () => ({
|
||||
toBuffer: async () => buffer
|
||||
})
|
||||
}),
|
||||
jpeg: () => ({
|
||||
toBuffer: async () => buffer
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
export interface AnalyzeImageToolRequest {
|
||||
image_path: string;
|
||||
question?: string;
|
||||
@@ -49,10 +67,34 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes an image with minimal processing when sharp isn't available
|
||||
*/
|
||||
async function processImageFallback(buffer: Buffer): Promise<string> {
|
||||
try {
|
||||
// Just return the buffer as base64 without processing
|
||||
return buffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('Error in fallback image processing:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function processImage(buffer: Buffer): Promise<string> {
|
||||
try {
|
||||
if (typeof sharp !== 'function') {
|
||||
console.warn('Using fallback image processing (sharp not available)');
|
||||
return processImageFallback(buffer);
|
||||
}
|
||||
|
||||
// Get image metadata
|
||||
const metadata = await sharp(buffer).metadata();
|
||||
let metadata;
|
||||
try {
|
||||
metadata = await sharp(buffer).metadata();
|
||||
} catch (error) {
|
||||
console.warn('Error getting image metadata, using fallback:', error);
|
||||
return processImageFallback(buffer);
|
||||
}
|
||||
|
||||
// Calculate dimensions to keep base64 size reasonable
|
||||
const MAX_DIMENSION = 800;
|
||||
@@ -81,39 +123,56 @@ async function processImage(buffer: Buffer): Promise<string> {
|
||||
|
||||
return jpegBuffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('Error processing image:', error);
|
||||
throw error;
|
||||
console.error('Error processing image, using fallback:', error);
|
||||
return processImageFallback(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the image at the given path to a base64 string
|
||||
* Processes an image from a path or base64 string to a proper base64 format for APIs
|
||||
*/
|
||||
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
||||
async function prepareImage(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
||||
try {
|
||||
// Ensure the image path is absolute
|
||||
if (!path.isAbsolute(imagePath)) {
|
||||
throw new McpError(
|
||||
ErrorCode.InvalidParams,
|
||||
'Image path must be absolute'
|
||||
);
|
||||
// Check if already a base64 data URL
|
||||
if (imagePath.startsWith('data:')) {
|
||||
const matches = imagePath.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
|
||||
if (!matches || matches.length !== 3) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'Invalid base64 data URL format');
|
||||
}
|
||||
return { base64: matches[2], mimeType: matches[1] };
|
||||
}
|
||||
|
||||
// Check if image is a URL
|
||||
if (imagePath.startsWith('http://') || imagePath.startsWith('https://')) {
|
||||
try {
|
||||
const buffer = await fetchImageAsBuffer(imagePath);
|
||||
const processed = await processImage(buffer);
|
||||
return { base64: processed, mimeType: 'image/jpeg' }; // We convert everything to JPEG
|
||||
} catch (error: any) {
|
||||
throw new McpError(ErrorCode.InvalidParams, `Failed to fetch image from URL: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle file paths
|
||||
let absolutePath = imagePath;
|
||||
|
||||
// Ensure the image path is absolute if it's a file path
|
||||
if (!imagePath.startsWith('data:') && !path.isAbsolute(imagePath)) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute');
|
||||
}
|
||||
|
||||
// Check if the file exists
|
||||
try {
|
||||
await fs.access(imagePath);
|
||||
// Check if the file exists
|
||||
await fs.access(absolutePath);
|
||||
} catch (error) {
|
||||
throw new McpError(
|
||||
ErrorCode.InvalidParams,
|
||||
`File not found: ${imagePath}`
|
||||
);
|
||||
throw new McpError(ErrorCode.InvalidParams, `File not found: ${absolutePath}`);
|
||||
}
|
||||
|
||||
// Read the file as a buffer
|
||||
const buffer = await fs.readFile(imagePath);
|
||||
const buffer = await fs.readFile(absolutePath);
|
||||
|
||||
// Determine MIME type from file extension
|
||||
const extension = path.extname(imagePath).toLowerCase();
|
||||
const extension = path.extname(absolutePath).toLowerCase();
|
||||
let mimeType: string;
|
||||
|
||||
switch (extension) {
|
||||
@@ -137,12 +196,11 @@ async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeT
|
||||
mimeType = 'application/octet-stream';
|
||||
}
|
||||
|
||||
// Convert buffer to base64
|
||||
const base64 = buffer.toString('base64');
|
||||
|
||||
return { base64, mimeType };
|
||||
// Process and optimize the image
|
||||
const processed = await processImage(buffer);
|
||||
return { base64: processed, mimeType };
|
||||
} catch (error) {
|
||||
console.error('Error converting image to base64:', error);
|
||||
console.error('Error preparing image:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@@ -160,23 +218,21 @@ export async function handleAnalyzeImage(
|
||||
try {
|
||||
// Validate inputs
|
||||
if (!args.image_path) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
|
||||
throw new McpError(ErrorCode.InvalidParams, 'An image path, URL, or base64 data is required');
|
||||
}
|
||||
|
||||
if (!args.question) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
|
||||
}
|
||||
const question = args.question || "What's in this image?";
|
||||
|
||||
console.error(`Processing image: ${args.image_path}`);
|
||||
console.error(`Processing image: ${args.image_path.substring(0, 100)}${args.image_path.length > 100 ? '...' : ''}`);
|
||||
|
||||
// Convert the image to base64
|
||||
const { base64, mimeType } = await imageToBase64(args.image_path);
|
||||
const { base64, mimeType } = await prepareImage(args.image_path);
|
||||
|
||||
// Create the content array for the OpenAI API
|
||||
const content = [
|
||||
{
|
||||
type: 'text',
|
||||
text: args.question
|
||||
text: question
|
||||
},
|
||||
{
|
||||
type: 'image_url',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import fetch from 'node-fetch';
|
||||
import sharp from 'sharp';
|
||||
// Remove the sharp import to avoid conflicts with our dynamic import
|
||||
// import sharp from 'sharp';
|
||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||
import OpenAI from 'openai';
|
||||
import path from 'path';
|
||||
@@ -8,6 +9,26 @@ import { tmpdir } from 'os';
|
||||
// Remove uuid import as we'll use a simple random string generator instead
|
||||
// import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
// Setup sharp with fallback
|
||||
let sharp: any;
|
||||
try {
|
||||
sharp = require('sharp');
|
||||
} catch (e) {
|
||||
console.error('Warning: sharp module not available, using fallback image processing');
|
||||
// Mock implementation that just passes through the base64 data
|
||||
sharp = (buffer: Buffer) => ({
|
||||
metadata: async () => ({ width: 800, height: 600 }),
|
||||
resize: () => ({
|
||||
jpeg: () => ({
|
||||
toBuffer: async () => buffer
|
||||
})
|
||||
}),
|
||||
jpeg: () => ({
|
||||
toBuffer: async () => buffer
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
// Default model for image analysis
|
||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||
|
||||
@@ -149,13 +170,25 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
||||
*/
|
||||
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
try {
|
||||
if (typeof sharp !== 'function') {
|
||||
console.warn('Using fallback image processing (sharp not available)');
|
||||
return processImageFallback(buffer, mimeType);
|
||||
}
|
||||
|
||||
// Create a temporary directory for processing if needed
|
||||
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
|
||||
await fs.mkdir(tempDir, { recursive: true });
|
||||
|
||||
// Get image info
|
||||
let sharpInstance = sharp(buffer);
|
||||
const metadata = await sharpInstance.metadata();
|
||||
let metadata;
|
||||
|
||||
try {
|
||||
metadata = await sharpInstance.metadata();
|
||||
} catch (error) {
|
||||
console.warn('Error getting image metadata, using fallback:', error);
|
||||
return processImageFallback(buffer, mimeType);
|
||||
}
|
||||
|
||||
// Skip processing for small images
|
||||
if (metadata.width && metadata.height &&
|
||||
@@ -177,19 +210,20 @@ async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to JPEG for consistency and small size
|
||||
const processedBuffer = await sharpInstance
|
||||
.jpeg({ quality: JPEG_QUALITY })
|
||||
.toBuffer();
|
||||
|
||||
return processedBuffer.toString('base64');
|
||||
try {
|
||||
// Convert to JPEG for consistency and small size
|
||||
const processedBuffer = await sharpInstance
|
||||
.jpeg({ quality: JPEG_QUALITY })
|
||||
.toBuffer();
|
||||
|
||||
return processedBuffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.warn('Error in final image processing, using fallback:', error);
|
||||
return processImageFallback(buffer, mimeType);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error processing image:', error);
|
||||
|
||||
// If sharp processing fails, return the original buffer
|
||||
// This is a fallback to ensure we don't completely fail on processing errors
|
||||
console.error('Returning original image without processing');
|
||||
return buffer.toString('base64');
|
||||
console.error('Error processing image, using fallback:', error);
|
||||
return processImageFallback(buffer, mimeType);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,7 +299,7 @@ export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Process and analyze multiple images using OpenRouter
|
||||
* Main handler for multi-image analysis
|
||||
*/
|
||||
export async function handleMultiImageAnalysis(
|
||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||
@@ -276,65 +310,50 @@ export async function handleMultiImageAnalysis(
|
||||
|
||||
try {
|
||||
// Validate inputs
|
||||
if (!args.images || args.images.length === 0) {
|
||||
if (!args.images || !Array.isArray(args.images) || args.images.length === 0) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'At least one image is required');
|
||||
}
|
||||
|
||||
if (!args.prompt) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'A prompt is required');
|
||||
throw new McpError(ErrorCode.InvalidParams, 'A prompt for analyzing the images is required');
|
||||
}
|
||||
|
||||
// Prepare content array for the message
|
||||
const content: Array<any> = [{
|
||||
type: 'text',
|
||||
text: args.prompt
|
||||
}];
|
||||
console.error(`Processing ${args.images.length} images`);
|
||||
|
||||
// Track successful and failed images for reporting
|
||||
const successfulImages = [];
|
||||
const failedImages = [];
|
||||
|
||||
// Process each image
|
||||
for (const [index, image] of args.images.entries()) {
|
||||
try {
|
||||
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
|
||||
|
||||
// Get MIME type
|
||||
const mimeType = getMimeType(image.url);
|
||||
|
||||
// Fetch and process the image
|
||||
const imageBuffer = await fetchImageAsBuffer(image.url);
|
||||
const base64Image = await processImage(imageBuffer, mimeType);
|
||||
|
||||
// Use JPEG as the output format for consistency
|
||||
const outputMimeType = 'image/jpeg';
|
||||
|
||||
// Add to content
|
||||
content.push({
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:${outputMimeType};base64,${base64Image}`
|
||||
// Process each image and convert to base64 if needed
|
||||
const processedImages = await Promise.all(
|
||||
args.images.map(async (image, index) => {
|
||||
try {
|
||||
// Skip processing if already a data URL
|
||||
if (image.url.startsWith('data:')) {
|
||||
console.error(`Image ${index + 1} is already in base64 format`);
|
||||
return image;
|
||||
}
|
||||
});
|
||||
|
||||
successfulImages.push(image.url);
|
||||
} catch (error) {
|
||||
console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
|
||||
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
|
||||
// Continue with other images if one fails
|
||||
}
|
||||
}
|
||||
|
||||
// If no images were successfully processed
|
||||
if (content.length === 1) {
|
||||
const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
|
||||
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
|
||||
}
|
||||
|
||||
console.error(`Processing image ${index + 1}: ${image.url.substring(0, 100)}${image.url.length > 100 ? '...' : ''}`);
|
||||
|
||||
// Get MIME type
|
||||
const mimeType = getMimeType(image.url);
|
||||
|
||||
// Fetch and process the image
|
||||
const buffer = await fetchImageAsBuffer(image.url);
|
||||
const base64 = await processImage(buffer, mimeType);
|
||||
|
||||
return {
|
||||
url: `data:${mimeType === 'application/octet-stream' ? 'image/jpeg' : mimeType};base64,${base64}`,
|
||||
alt: image.alt
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.error(`Error processing image ${index + 1}:`, error);
|
||||
throw new Error(`Failed to process image ${index + 1}: ${image.url}. Error: ${error.message}`);
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
// Select model with priority:
|
||||
// 1. User-specified model
|
||||
// 2. Default model from environment
|
||||
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
||||
// 3. Default free vision model
|
||||
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||
|
||||
// If a model is specified but not our default free model, verify it exists
|
||||
@@ -348,7 +367,30 @@ export async function handleMultiImageAnalysis(
|
||||
}
|
||||
|
||||
console.error(`Making API call with model: ${model}`);
|
||||
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
|
||||
|
||||
// Build content array for the API call
|
||||
const content: Array<{
|
||||
type: string;
|
||||
text?: string;
|
||||
image_url?: {
|
||||
url: string
|
||||
}
|
||||
}> = [
|
||||
{
|
||||
type: 'text',
|
||||
text: args.prompt
|
||||
}
|
||||
];
|
||||
|
||||
// Add each processed image to the content array
|
||||
processedImages.forEach(image => {
|
||||
content.push({
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: image.url
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Make the API call
|
||||
const completion = await openai.chat.completions.create({
|
||||
@@ -359,16 +401,19 @@ export async function handleMultiImageAnalysis(
|
||||
}] as any
|
||||
});
|
||||
|
||||
// Format the response
|
||||
// Get response text and format if requested
|
||||
let responseText = completion.choices[0].message.content || '';
|
||||
|
||||
// Add information about failed images if any
|
||||
if (failedImages.length > 0) {
|
||||
const formattedErrors = args.markdown_response !== false
|
||||
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
|
||||
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
|
||||
|
||||
responseText += formattedErrors;
|
||||
// Format as markdown if requested
|
||||
if (args.markdown_response) {
|
||||
// Simple formatting enhancements
|
||||
responseText = responseText
|
||||
// Add horizontal rule after sections
|
||||
.replace(/^(#{1,3}.*)/gm, '$1\n\n---')
|
||||
// Ensure proper spacing for lists
|
||||
.replace(/^(\s*[-*•]\s.+)$/gm, '\n$1')
|
||||
// Convert plain URLs to markdown links
|
||||
.replace(/(https?:\/\/[^\s]+)/g, '[$1]($1)');
|
||||
}
|
||||
|
||||
// Return the analysis result
|
||||
@@ -381,12 +426,10 @@ export async function handleMultiImageAnalysis(
|
||||
],
|
||||
metadata: {
|
||||
model: completion.model,
|
||||
usage: completion.usage,
|
||||
successful_images: successfulImages.length,
|
||||
failed_images: failedImages.length
|
||||
usage: completion.usage
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
} catch (error: any) {
|
||||
console.error('Error in multi-image analysis:', error);
|
||||
|
||||
if (error instanceof McpError) {
|
||||
@@ -397,14 +440,27 @@ export async function handleMultiImageAnalysis(
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: `Error analyzing images: ${error instanceof Error ? error.message : String(error)}`,
|
||||
text: `Error analyzing images: ${error.message}`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
metadata: {
|
||||
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
||||
error_message: error instanceof Error ? error.message : String(error)
|
||||
error_type: error.constructor.name,
|
||||
error_message: error.message
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes an image with minimal processing when sharp isn't available
|
||||
*/
|
||||
async function processImageFallback(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
try {
|
||||
// Just return the buffer as base64 without processing
|
||||
return buffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('Error in fallback image processing:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user