Files
openrouter-mcp-multimodal/src/tool-handlers/multi-image-analysis.ts

169 lines
4.5 KiB
TypeScript

import fetch from 'node-fetch';
import sharp from 'sharp';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai';
export interface MultiImageAnalysisToolRequest {
images: Array<{
url: string;
alt?: string;
}>;
prompt: string;
markdown_response?: boolean;
model?: string;
}
async function fetchImageAsBuffer(url: string): Promise<Buffer> {
try {
// Handle data URLs
if (url.startsWith('data:')) {
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
if (!matches || matches.length !== 3) {
throw new Error('Invalid data URL');
}
return Buffer.from(matches[2], 'base64');
}
// Handle file URLs
if (url.startsWith('file://')) {
const filePath = url.replace('file://', '');
const fs = await import('fs/promises');
return await fs.readFile(filePath);
}
// Handle http/https URLs
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return Buffer.from(await response.arrayBuffer());
} catch (error) {
console.error(`Error fetching image from ${url}:`, error);
throw error;
}
}
async function processImage(buffer: Buffer): Promise<string> {
try {
// Get image metadata
const metadata = await sharp(buffer).metadata();
// Calculate dimensions to keep base64 size reasonable
const MAX_DIMENSION = 800;
const JPEG_QUALITY = 80;
if (metadata.width && metadata.height) {
const largerDimension = Math.max(metadata.width, metadata.height);
if (largerDimension > MAX_DIMENSION) {
const resizeOptions = metadata.width > metadata.height
? { width: MAX_DIMENSION }
: { height: MAX_DIMENSION };
const resizedBuffer = await sharp(buffer)
.resize(resizeOptions)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return resizedBuffer.toString('base64');
}
}
// If no resizing needed, just convert to JPEG
const jpegBuffer = await sharp(buffer)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return jpegBuffer.toString('base64');
} catch (error) {
console.error('Error processing image:', error);
throw error;
}
}
export async function handleMultiImageAnalysis(
request: { params: { arguments: MultiImageAnalysisToolRequest } },
openai: OpenAI,
defaultModel?: string
) {
const args = request.params.arguments;
try {
// Validate inputs
if (!args.images || args.images.length === 0) {
throw new McpError(ErrorCode.InvalidParams, 'At least one image is required');
}
if (!args.prompt) {
throw new McpError(ErrorCode.InvalidParams, 'A prompt is required');
}
// Prepare content array for the message
const content: Array<any> = [{
type: 'text',
text: args.prompt
}];
// Process each image
for (const image of args.images) {
try {
// Fetch and process the image
const imageBuffer = await fetchImageAsBuffer(image.url);
const base64Image = await processImage(imageBuffer);
// Add to content
content.push({
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64Image}`
}
});
} catch (error) {
console.error(`Error processing image ${image.url}:`, error);
// Continue with other images if one fails
}
}
// If no images were successfully processed
if (content.length === 1) {
throw new Error('Failed to process any of the provided images');
}
// Select model
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
// Make the API call
const completion = await openai.chat.completions.create({
model,
messages: [{
role: 'user',
content
}] as any
});
return {
content: [
{
type: 'text',
text: completion.choices[0].message.content || '',
},
],
};
} catch (error) {
console.error('Error in multi-image analysis:', error);
if (error instanceof McpError) {
throw error;
}
return {
content: [
{
type: 'text',
text: `Error analyzing images: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}