Enhanced image analysis capabilities with improved handlers and default model setup

This commit is contained in:
stabgan
2025-03-27 15:51:54 +05:30
parent 3f9840d884
commit baf1270e89
4 changed files with 405 additions and 120 deletions

View File

@@ -5,14 +5,17 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
import { ToolHandlers } from './tool-handlers.js'; import { ToolHandlers } from './tool-handlers.js';
// Define the default model to use when none is specified
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
class OpenRouterMultimodalServer { class OpenRouterMultimodalServer {
private server: Server; private server: Server;
private toolHandlers!: ToolHandlers; // Using definite assignment assertion private toolHandlers!: ToolHandlers; // Using definite assignment assertion
constructor() { constructor() {
// Get API key and default model from environment variables // Retrieve API key and default model from environment variables
const apiKey = process.env.OPENROUTER_API_KEY; const apiKey = process.env.OPENROUTER_API_KEY;
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL; const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
// Check if API key is provided // Check if API key is provided
if (!apiKey) { if (!apiKey) {
@@ -55,11 +58,10 @@ class OpenRouterMultimodalServer {
console.error('Using API key from environment variable'); console.error('Using API key from environment variable');
console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:'); console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:');
console.error(' OPENROUTER_API_KEY=your-api-key'); console.error(' OPENROUTER_API_KEY=your-api-key');
if (process.env.OPENROUTER_DEFAULT_MODEL) {
console.error(` Using default model: ${process.env.OPENROUTER_DEFAULT_MODEL}`); const modelDisplay = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
} else { console.error(` Using default model: ${modelDisplay}`);
console.error(' No default model set. You will need to specify a model in each request.'); console.error('Server is ready to process tool calls. Waiting for input...');
}
} }
} }

View File

@@ -16,6 +16,7 @@ import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/sea
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js'; import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js'; import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js'; import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
export class ToolHandlers { export class ToolHandlers {
private server: Server; private server: Server;
@@ -51,7 +52,7 @@ export class ToolHandlers {
tools: [ tools: [
// Chat Completion Tool // Chat Completion Tool
{ {
name: 'chat_completion', name: 'mcp_openrouter_chat_completion',
description: 'Send a message to OpenRouter.ai and get a response', description: 'Send a message to OpenRouter.ai and get a response',
inputSchema: { inputSchema: {
type: 'object', type: 'object',
@@ -127,10 +128,34 @@ export class ToolHandlers {
maxContextTokens: 200000 maxContextTokens: 200000
}, },
// Single Image Analysis Tool
{
name: 'mcp_openrouter_analyze_image',
description: 'Analyze an image using OpenRouter vision models',
inputSchema: {
type: 'object',
properties: {
image_path: {
type: 'string',
description: 'Path to the image file to analyze (must be an absolute path)',
},
question: {
type: 'string',
description: 'Question to ask about the image',
},
model: {
type: 'string',
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
},
},
required: ['image_path'],
},
},
// Multi-Image Analysis Tool // Multi-Image Analysis Tool
{ {
name: 'multi_image_analysis', name: 'mcp_openrouter_multi_image_analysis',
description: 'Analyze one or more images with a prompt and receive detailed responses', description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
inputSchema: { inputSchema: {
type: 'object', type: 'object',
properties: { properties: {
@@ -269,14 +294,21 @@ export class ToolHandlers {
this.server.setRequestHandler(CallToolRequestSchema, async (request) => { this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
switch (request.params.name) { switch (request.params.name) {
case 'chat_completion': case 'mcp_openrouter_chat_completion':
return handleChatCompletion({ return handleChatCompletion({
params: { params: {
arguments: request.params.arguments as unknown as ChatCompletionToolRequest arguments: request.params.arguments as unknown as ChatCompletionToolRequest
} }
}, this.openai, this.defaultModel); }, this.openai, this.defaultModel);
case 'multi_image_analysis': case 'mcp_openrouter_analyze_image':
return handleAnalyzeImage({
params: {
arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
}
}, this.openai, this.defaultModel);
case 'mcp_openrouter_multi_image_analysis':
return handleMultiImageAnalysis({ return handleMultiImageAnalysis({
params: { params: {
arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest

View File

@@ -4,10 +4,13 @@ import sharp from 'sharp';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai'; import OpenAI from 'openai';
import fetch from 'node-fetch'; import fetch from 'node-fetch';
import { findSuitableFreeModel } from './multi-image-analysis.js';
// Default model for image analysis
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
export interface AnalyzeImageToolRequest { export interface AnalyzeImageToolRequest {
image_path?: string; image_path: string;
image_url?: string;
question?: string; question?: string;
model?: string; model?: string;
} }
@@ -83,6 +86,70 @@ async function processImage(buffer: Buffer): Promise<string> {
} }
} }
/**
* Converts the image at the given path to a base64 string
*/
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
try {
// Ensure the image path is absolute
if (!path.isAbsolute(imagePath)) {
throw new McpError(
ErrorCode.InvalidParams,
'Image path must be absolute'
);
}
// Check if the file exists
try {
await fs.access(imagePath);
} catch (error) {
throw new McpError(
ErrorCode.InvalidParams,
`File not found: ${imagePath}`
);
}
// Read the file as a buffer
const buffer = await fs.readFile(imagePath);
// Determine MIME type from file extension
const extension = path.extname(imagePath).toLowerCase();
let mimeType: string;
switch (extension) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
case '.gif':
mimeType = 'image/gif';
break;
case '.bmp':
mimeType = 'image/bmp';
break;
default:
mimeType = 'application/octet-stream';
}
// Convert buffer to base64
const base64 = buffer.toString('base64');
return { base64, mimeType };
} catch (error) {
console.error('Error converting image to base64:', error);
throw error;
}
}
/**
* Handler for analyzing a single image
*/
export async function handleAnalyzeImage( export async function handleAnalyzeImage(
request: { params: { arguments: AnalyzeImageToolRequest } }, request: { params: { arguments: AnalyzeImageToolRequest } },
openai: OpenAI, openai: OpenAI,
@@ -91,71 +158,62 @@ export async function handleAnalyzeImage(
const args = request.params.arguments; const args = request.params.arguments;
try { try {
// Validate image source // Validate inputs
const imagePath = args.image_path; if (!args.image_path) {
const imageUrl = args.image_url; throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
if (!imagePath && !imageUrl) {
throw new McpError(ErrorCode.InvalidParams, 'Either image_path or image_url must be provided');
} }
// Normalize the path/url if (!args.question) {
let imageSource: string; throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
if (imageUrl) {
// Use the provided URL directly
imageSource = imageUrl;
} else if (imagePath) {
// For backward compatibility, try to handle the image_path
if (path.isAbsolute(imagePath)) {
// For absolute paths, use as a local file path
imageSource = imagePath;
} else {
// For relative paths, show a better error message
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute or use image_url with file:// prefix');
}
} else {
// This shouldn't happen due to the check above, but TypeScript doesn't know that
throw new McpError(ErrorCode.InvalidParams, 'No image source provided');
} }
// Fetch and process the image console.error(`Processing image: ${args.image_path}`);
const imageBuffer = await fetchImageAsBuffer(imageSource);
console.error(`Successfully read image buffer of size: ${imageBuffer.length}`);
// Process the image (resize if needed) // Convert the image to base64
const base64Image = await processImage(imageBuffer); const { base64, mimeType } = await imageToBase64(args.image_path);
// Select model // Create the content array for the OpenAI API
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet'; const content = [
// Prepare message with image
const messages = [
{
role: 'user',
content: [
{ {
type: 'text', type: 'text',
text: args.question || "What's in this image?" text: args.question
}, },
{ {
type: 'image_url', type: 'image_url',
image_url: { image_url: {
url: `data:image/jpeg;base64,${base64Image}` url: `data:${mimeType};base64,${base64}`
} }
} }
]
}
]; ];
console.error('Sending request to OpenRouter...'); // Select model with priority:
// 1. User-specified model
// 2. Default model from environment
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
// Call OpenRouter API // If a model is specified but not our default free model, verify it exists
if (model !== DEFAULT_FREE_MODEL) {
try {
await openai.models.retrieve(model);
} catch (error) {
console.error(`Specified model ${model} not found, falling back to auto-selection`);
model = await findSuitableFreeModel(openai);
}
}
console.error(`Making API call with model: ${model}`);
// Make the API call
const completion = await openai.chat.completions.create({ const completion = await openai.chat.completions.create({
model, model,
messages: messages as any, messages: [{
role: 'user',
content
}] as any
}); });
// Return the analysis result
return { return {
content: [ content: [
{ {
@@ -163,9 +221,13 @@ export async function handleAnalyzeImage(
text: completion.choices[0].message.content || '', text: completion.choices[0].message.content || '',
}, },
], ],
metadata: {
model: completion.model,
usage: completion.usage
}
}; };
} catch (error) { } catch (error) {
console.error('Error analyzing image:', error); console.error('Error in image analysis:', error);
if (error instanceof McpError) { if (error instanceof McpError) {
throw error; throw error;
@@ -179,6 +241,10 @@ export async function handleAnalyzeImage(
}, },
], ],
isError: true, isError: true,
metadata: {
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
error_message: error instanceof Error ? error.message : String(error)
}
}; };
} }
} }

View File

@@ -2,6 +2,26 @@ import fetch from 'node-fetch';
import sharp from 'sharp'; import sharp from 'sharp';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai'; import OpenAI from 'openai';
import path from 'path';
import { promises as fs } from 'fs';
import { tmpdir } from 'os';
// Remove uuid import as we'll use a simple random string generator instead
// import { v4 as uuidv4 } from 'uuid';
// Default model for image analysis
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
// Image processing constants
const MAX_DIMENSION = 800;
const JPEG_QUALITY = 80;
const MAX_RETRY_ATTEMPTS = 3;
const RETRY_DELAY = 1000; // ms
// Simple random ID generator to replace uuid
function generateRandomId(): string {
return Math.random().toString(36).substring(2, 15) +
Math.random().toString(36).substring(2, 15);
}
export interface MultiImageAnalysisToolRequest { export interface MultiImageAnalysisToolRequest {
images: Array<{ images: Array<{
@@ -13,45 +33,139 @@ export interface MultiImageAnalysisToolRequest {
model?: string; model?: string;
} }
/**
* Sleep function for retry mechanisms
*/
const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
/**
* Get MIME type from file extension or data URL
*/
function getMimeType(url: string): string {
if (url.startsWith('data:')) {
const match = url.match(/^data:([^;]+);/);
return match ? match[1] : 'application/octet-stream';
}
const extension = path.extname(url.split('?')[0]).toLowerCase();
switch (extension) {
case '.png': return 'image/png';
case '.jpg':
case '.jpeg': return 'image/jpeg';
case '.webp': return 'image/webp';
case '.gif': return 'image/gif';
case '.bmp': return 'image/bmp';
case '.svg': return 'image/svg+xml';
default: return 'application/octet-stream';
}
}
/**
* Fetch image from various sources: data URLs, file paths, or remote URLs
*/
async function fetchImageAsBuffer(url: string): Promise<Buffer> { async function fetchImageAsBuffer(url: string): Promise<Buffer> {
try { try {
// Handle data URLs // Handle data URLs
if (url.startsWith('data:')) { if (url.startsWith('data:')) {
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/); const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
if (!matches || matches.length !== 3) { if (!matches || matches.length !== 3) {
throw new Error('Invalid data URL'); throw new Error('Invalid data URL format');
} }
return Buffer.from(matches[2], 'base64'); return Buffer.from(matches[2], 'base64');
} }
// Handle file URLs // Handle file URLs with file:// protocol
if (url.startsWith('file://')) { if (url.startsWith('file://')) {
const filePath = url.replace('file://', ''); const filePath = url.replace('file://', '');
const fs = await import('fs/promises'); try {
return await fs.readFile(filePath); return await fs.readFile(filePath);
} catch (error) {
console.error(`Error reading file at ${filePath}:`, error);
throw new Error(`Failed to read file: ${filePath}`);
}
}
// Handle absolute and relative file paths
if (url.startsWith('/') || url.startsWith('./') || url.startsWith('../') || /^[A-Za-z]:\\/.test(url)) {
try {
return await fs.readFile(url);
} catch (error) {
console.error(`Error reading file at ${url}:`, error);
throw new Error(`Failed to read file: ${url}`);
}
} }
// Handle http/https URLs // Handle http/https URLs
const response = await fetch(url); if (url.startsWith('http://') || url.startsWith('https://')) {
for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) {
try {
// Use AbortController for timeout instead of timeout option
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 15000);
const response = await fetch(url, {
signal: controller.signal,
headers: {
'User-Agent': 'OpenRouter-MCP-Server/1.0'
}
});
// Clear the timeout to prevent memory leaks
clearTimeout(timeoutId);
if (!response.ok) { if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`); throw new Error(`HTTP error! status: ${response.status}`);
} }
return Buffer.from(await response.arrayBuffer()); return Buffer.from(await response.arrayBuffer());
} catch (error) {
console.error(`Error fetching URL (attempt ${attempt + 1}/${MAX_RETRY_ATTEMPTS}): ${url}`, error);
if (attempt < MAX_RETRY_ATTEMPTS - 1) {
// Exponential backoff with jitter
const delay = RETRY_DELAY * Math.pow(2, attempt) * (0.5 + Math.random() * 0.5);
await sleep(delay);
} else {
throw error;
}
}
}
}
// If we get here, the URL format is unsupported
throw new Error(`Unsupported URL format: ${url}`);
} catch (error) { } catch (error) {
console.error(`Error fetching image from ${url}:`, error); console.error(`Error fetching image from ${url}:`, error);
throw error; throw error;
} }
// TypeScript requires a return statement here, but this is unreachable
return Buffer.from([]);
} }
async function processImage(buffer: Buffer): Promise<string> { /**
* Process and optimize image for API consumption
*/
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
try { try {
// Get image metadata // Create a temporary directory for processing if needed
const metadata = await sharp(buffer).metadata(); const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
await fs.mkdir(tempDir, { recursive: true });
// Calculate dimensions to keep base64 size reasonable // Get image info
const MAX_DIMENSION = 800; let sharpInstance = sharp(buffer);
const JPEG_QUALITY = 80; const metadata = await sharpInstance.metadata();
// Skip processing for small images
if (metadata.width && metadata.height &&
metadata.width <= MAX_DIMENSION &&
metadata.height <= MAX_DIMENSION &&
(mimeType === 'image/jpeg' || mimeType === 'image/webp')) {
return buffer.toString('base64');
}
// Resize larger images
if (metadata.width && metadata.height) { if (metadata.width && metadata.height) {
const largerDimension = Math.max(metadata.width, metadata.height); const largerDimension = Math.max(metadata.width, metadata.height);
if (largerDimension > MAX_DIMENSION) { if (largerDimension > MAX_DIMENSION) {
@@ -59,44 +173,65 @@ async function processImage(buffer: Buffer): Promise<string> {
? { width: MAX_DIMENSION } ? { width: MAX_DIMENSION }
: { height: MAX_DIMENSION }; : { height: MAX_DIMENSION };
const resizedBuffer = await sharp(buffer) sharpInstance = sharpInstance.resize(resizeOptions);
.resize(resizeOptions)
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return resizedBuffer.toString('base64');
} }
} }
// If no resizing needed, just convert to JPEG // Convert to JPEG for consistency and small size
const jpegBuffer = await sharp(buffer) const processedBuffer = await sharpInstance
.jpeg({ quality: JPEG_QUALITY }) .jpeg({ quality: JPEG_QUALITY })
.toBuffer(); .toBuffer();
return jpegBuffer.toString('base64'); return processedBuffer.toString('base64');
} catch (error) { } catch (error) {
console.error('Error processing image:', error); console.error('Error processing image:', error);
throw error;
// If sharp processing fails, return the original buffer
// This is a fallback to ensure we don't completely fail on processing errors
console.error('Returning original image without processing');
return buffer.toString('base64');
} }
} }
// Find a suitable free model with vision capabilities /**
async function findSuitableFreeModel(openai: OpenAI): Promise<string> { * Find a suitable free model with vision capabilities, defaulting to Qwen
*/
export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
try { try {
// Query available models with 'free' in their name // First try with an exact match for our preferred model
const modelsResponse = await openai.models.list(); const preferredModel = DEFAULT_FREE_MODEL;
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model try {
// Check if our preferred model is available
const modelInfo = await openai.models.retrieve(preferredModel);
if (modelInfo && modelInfo.id) {
console.error(`Using preferred model: ${preferredModel}`);
return preferredModel;
}
} catch (error) {
console.error(`Preferred model ${preferredModel} not available, searching for alternatives...`);
} }
// Filter models with 'free' in ID and multimodal capabilities // Query available models
const freeModels = modelsResponse.data const modelsResponse = await openai.models.list();
.filter(model => model.id.includes('free')) if (!modelsResponse?.data || modelsResponse.data.length === 0) {
console.error('No models found, using default fallback model');
return DEFAULT_FREE_MODEL;
}
// First, try to find free vision models
const freeVisionModels = modelsResponse.data
.filter(model => {
const modelId = model.id.toLowerCase();
return modelId.includes('free') &&
(modelId.includes('vl') || modelId.includes('vision') || modelId.includes('claude') ||
modelId.includes('gemini') || modelId.includes('gpt-4') || modelId.includes('qwen'));
})
.map(model => { .map(model => {
// Try to extract context length from the model object // Extract context length if available
let contextLength = 0; let contextLength = 0;
try { try {
const modelAny = model as any; // Cast to any to access non-standard properties const modelAny = model as any;
if (typeof modelAny.context_length === 'number') { if (typeof modelAny.context_length === 'number') {
contextLength = modelAny.context_length; contextLength = modelAny.context_length;
} else if (modelAny.context_window) { } else if (modelAny.context_window) {
@@ -112,21 +247,26 @@ async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
}; };
}); });
if (freeModels.length === 0) { if (freeVisionModels.length > 0) {
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
}
// Sort by context length and pick the one with the largest context window // Sort by context length and pick the one with the largest context window
freeModels.sort((a, b) => b.contextLength - a.contextLength); freeVisionModels.sort((a, b) => b.contextLength - a.contextLength);
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`); const selectedModel = freeVisionModels[0].id;
console.error(`Selected free vision model: ${selectedModel} with context length: ${freeVisionModels[0].contextLength}`);
return selectedModel;
}
return freeModels[0].id; // If no free vision models found, fallback to our default
console.error('No free vision models found, using default fallback model');
return DEFAULT_FREE_MODEL;
} catch (error) { } catch (error) {
console.error('Error finding suitable free model:', error); console.error('Error finding suitable model:', error);
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model return DEFAULT_FREE_MODEL;
} }
} }
/**
* Process and analyze multiple images using OpenRouter
*/
export async function handleMultiImageAnalysis( export async function handleMultiImageAnalysis(
request: { params: { arguments: MultiImageAnalysisToolRequest } }, request: { params: { arguments: MultiImageAnalysisToolRequest } },
openai: OpenAI, openai: OpenAI,
@@ -150,43 +290,65 @@ export async function handleMultiImageAnalysis(
text: args.prompt text: args.prompt
}]; }];
// Track successful and failed images for reporting
const successfulImages = [];
const failedImages = [];
// Process each image // Process each image
for (const image of args.images) { for (const [index, image] of args.images.entries()) {
try { try {
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
// Get MIME type
const mimeType = getMimeType(image.url);
// Fetch and process the image // Fetch and process the image
const imageBuffer = await fetchImageAsBuffer(image.url); const imageBuffer = await fetchImageAsBuffer(image.url);
const base64Image = await processImage(imageBuffer); const base64Image = await processImage(imageBuffer, mimeType);
// Use JPEG as the output format for consistency
const outputMimeType = 'image/jpeg';
// Add to content // Add to content
content.push({ content.push({
type: 'image_url', type: 'image_url',
image_url: { image_url: {
url: `data:image/jpeg;base64,${base64Image}` url: `data:${outputMimeType};base64,${base64Image}`
} }
}); });
successfulImages.push(image.url);
} catch (error) { } catch (error) {
console.error(`Error processing image ${image.url}:`, error); console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
// Continue with other images if one fails // Continue with other images if one fails
} }
} }
// If no images were successfully processed // If no images were successfully processed
if (content.length === 1) { if (content.length === 1) {
throw new Error('Failed to process any of the provided images'); const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
} }
// Select model with priority: // Select model with priority:
// 1. User-specified model // 1. User-specified model
// 2. Default model from environment // 2. Default model from environment
// 3. Free model with vision capabilities (selected automatically) // 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
let model = args.model || defaultModel; let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
if (!model) { // If a model is specified but not our default free model, verify it exists
if (model !== DEFAULT_FREE_MODEL) {
try {
await openai.models.retrieve(model);
} catch (error) {
console.error(`Specified model ${model} not found, falling back to auto-selection`);
model = await findSuitableFreeModel(openai); model = await findSuitableFreeModel(openai);
console.error(`Using auto-selected model: ${model}`); }
} }
console.error(`Making API call with model: ${model}`); console.error(`Making API call with model: ${model}`);
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
// Make the API call // Make the API call
const completion = await openai.chat.completions.create({ const completion = await openai.chat.completions.create({
@@ -197,13 +359,32 @@ export async function handleMultiImageAnalysis(
}] as any }] as any
}); });
// Format the response
let responseText = completion.choices[0].message.content || '';
// Add information about failed images if any
if (failedImages.length > 0) {
const formattedErrors = args.markdown_response !== false
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
responseText += formattedErrors;
}
// Return the analysis result
return { return {
content: [ content: [
{ {
type: 'text', type: 'text',
text: completion.choices[0].message.content || '', text: responseText,
}, },
], ],
metadata: {
model: completion.model,
usage: completion.usage,
successful_images: successfulImages.length,
failed_images: failedImages.length
}
}; };
} catch (error) { } catch (error) {
console.error('Error in multi-image analysis:', error); console.error('Error in multi-image analysis:', error);
@@ -220,6 +401,10 @@ export async function handleMultiImageAnalysis(
}, },
], ],
isError: true, isError: true,
metadata: {
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
error_message: error instanceof Error ? error.message : String(error)
}
}; };
} }
} }