Enhance analyze_image tool to support URLs, file paths, and data URIs

This commit is contained in:
stabgan
2025-03-27 13:19:32 +05:30
parent b8c6e0c8be
commit 74d2997547
2 changed files with 111 additions and 36 deletions

View File

@@ -139,6 +139,10 @@ export class ToolHandlers {
type: 'string', type: 'string',
description: 'Path to the image file to analyze (must be an absolute path)', description: 'Path to the image file to analyze (must be an absolute path)',
}, },
image_url: {
type: 'string',
description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)',
},
question: { question: {
type: 'string', type: 'string',
description: 'Question to ask about the image', description: 'Question to ask about the image',
@@ -148,7 +152,10 @@ export class ToolHandlers {
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")', description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
}, },
}, },
required: ['image_path'], oneOf: [
{ required: ['image_path'] },
{ required: ['image_url'] }
]
}, },
}, },

View File

@@ -3,13 +3,86 @@ import { promises as fs } from 'fs';
import sharp from 'sharp'; import sharp from 'sharp';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai'; import OpenAI from 'openai';
import fetch from 'node-fetch';
export interface AnalyzeImageToolRequest { export interface AnalyzeImageToolRequest {
image_path: string; image_path?: string;
image_url?: string;
question?: string; question?: string;
model?: string; model?: string;
} }
async function fetchImageAsBuffer(url: string): Promise<Buffer> {
try {
// Handle data URLs
if (url.startsWith('data:')) {
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
if (!matches || matches.length !== 3) {
throw new Error('Invalid data URL');
}
return Buffer.from(matches[2], 'base64');
}
// Handle file URLs
if (url.startsWith('file://')) {
const filePath = url.replace('file://', '');
return await fs.readFile(filePath);
}
// Handle http/https URLs
if (url.startsWith('http://') || url.startsWith('https://')) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return Buffer.from(await response.arrayBuffer());
}
// Handle regular file paths
return await fs.readFile(url);
} catch (error) {
console.error(`Error fetching image from ${url}:`, error);
throw error;
}
}
async function processImage(buffer: Buffer): Promise<string> {
try {
// Get image metadata
const metadata = await sharp(buffer).metadata();
// Calculate dimensions to keep base64 size reasonable
const MAX_DIMENSION = 800;
const JPEG_QUALITY = 80;
if (metadata.width && metadata.height) {
const largerDimension = Math.max(metadata.width, metadata.height);
if (largerDimension > MAX_DIMENSION) {
const resizeOptions = metadata.width > metadata.height
? { width: MAX_DIMENSION }
: { height: MAX_DIMENSION };
const resizedBuffer = await sharp(buffer)
.resize(resizeOptions)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return resizedBuffer.toString('base64');
}
}
// If no resizing needed, just convert to JPEG
const jpegBuffer = await sharp(buffer)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return jpegBuffer.toString('base64');
} catch (error) {
console.error('Error processing image:', error);
throw error;
}
}
export async function handleAnalyzeImage( export async function handleAnalyzeImage(
request: { params: { arguments: AnalyzeImageToolRequest } }, request: { params: { arguments: AnalyzeImageToolRequest } },
openai: OpenAI, openai: OpenAI,
@@ -18,45 +91,40 @@ export async function handleAnalyzeImage(
const args = request.params.arguments; const args = request.params.arguments;
try { try {
// Validate image path // Validate image source
const imagePath = args.image_path; const imagePath = args.image_path;
if (!path.isAbsolute(imagePath)) { const imageUrl = args.image_url;
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute');
if (!imagePath && !imageUrl) {
throw new McpError(ErrorCode.InvalidParams, 'Either image_path or image_url must be provided');
} }
// Read image file // Normalize the path/url
const imageBuffer = await fs.readFile(imagePath); let imageSource: string;
if (imageUrl) {
// Use the provided URL directly
imageSource = imageUrl;
} else if (imagePath) {
// For backward compatibility, try to handle the image_path
if (path.isAbsolute(imagePath)) {
// For absolute paths, use as a local file path
imageSource = imagePath;
} else {
// For relative paths, show a better error message
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute or use image_url with file:// prefix');
}
} else {
// This shouldn't happen due to the check above, but TypeScript doesn't know that
throw new McpError(ErrorCode.InvalidParams, 'No image source provided');
}
// Fetch and process the image
const imageBuffer = await fetchImageAsBuffer(imageSource);
console.error(`Successfully read image buffer of size: ${imageBuffer.length}`); console.error(`Successfully read image buffer of size: ${imageBuffer.length}`);
// Get image metadata // Process the image (resize if needed)
const metadata = await sharp(imageBuffer).metadata(); const base64Image = await processImage(imageBuffer);
console.error('Image metadata:', metadata);
// Calculate dimensions to keep base64 size reasonable
const MAX_DIMENSION = 800; // Larger than original example for better quality
const JPEG_QUALITY = 80; // Higher quality
let resizedBuffer = imageBuffer;
if (metadata.width && metadata.height) {
const largerDimension = Math.max(metadata.width, metadata.height);
if (largerDimension > MAX_DIMENSION) {
const resizeOptions = metadata.width > metadata.height
? { width: MAX_DIMENSION }
: { height: MAX_DIMENSION };
resizedBuffer = await sharp(imageBuffer)
.resize(resizeOptions)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
} else {
resizedBuffer = await sharp(imageBuffer)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
}
}
// Convert to base64
const base64Image = resizedBuffer.toString('base64');
// Select model // Select model
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet'; const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';