Enhanced image analysis capabilities with improved handlers and default model setup
This commit is contained in:
16
src/index.ts
16
src/index.ts
@@ -5,14 +5,17 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
||||
|
||||
import { ToolHandlers } from './tool-handlers.js';
|
||||
|
||||
// Define the default model to use when none is specified
|
||||
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||
|
||||
class OpenRouterMultimodalServer {
|
||||
private server: Server;
|
||||
private toolHandlers!: ToolHandlers; // Using definite assignment assertion
|
||||
|
||||
constructor() {
|
||||
// Get API key and default model from environment variables
|
||||
// Retrieve API key and default model from environment variables
|
||||
const apiKey = process.env.OPENROUTER_API_KEY;
|
||||
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL;
|
||||
const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
|
||||
|
||||
// Check if API key is provided
|
||||
if (!apiKey) {
|
||||
@@ -55,11 +58,10 @@ class OpenRouterMultimodalServer {
|
||||
console.error('Using API key from environment variable');
|
||||
console.error('Note: To use OpenRouter Multimodal, add the API key to your environment variables:');
|
||||
console.error(' OPENROUTER_API_KEY=your-api-key');
|
||||
if (process.env.OPENROUTER_DEFAULT_MODEL) {
|
||||
console.error(` Using default model: ${process.env.OPENROUTER_DEFAULT_MODEL}`);
|
||||
} else {
|
||||
console.error(' No default model set. You will need to specify a model in each request.');
|
||||
}
|
||||
|
||||
const modelDisplay = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
|
||||
console.error(` Using default model: ${modelDisplay}`);
|
||||
console.error('Server is ready to process tool calls. Waiting for input...');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/sea
|
||||
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
||||
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
||||
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
||||
import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
|
||||
|
||||
export class ToolHandlers {
|
||||
private server: Server;
|
||||
@@ -51,7 +52,7 @@ export class ToolHandlers {
|
||||
tools: [
|
||||
// Chat Completion Tool
|
||||
{
|
||||
name: 'chat_completion',
|
||||
name: 'mcp_openrouter_chat_completion',
|
||||
description: 'Send a message to OpenRouter.ai and get a response',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
@@ -127,10 +128,34 @@ export class ToolHandlers {
|
||||
maxContextTokens: 200000
|
||||
},
|
||||
|
||||
// Single Image Analysis Tool
|
||||
{
|
||||
name: 'mcp_openrouter_analyze_image',
|
||||
description: 'Analyze an image using OpenRouter vision models',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
image_path: {
|
||||
type: 'string',
|
||||
description: 'Path to the image file to analyze (must be an absolute path)',
|
||||
},
|
||||
question: {
|
||||
type: 'string',
|
||||
description: 'Question to ask about the image',
|
||||
},
|
||||
model: {
|
||||
type: 'string',
|
||||
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
|
||||
},
|
||||
},
|
||||
required: ['image_path'],
|
||||
},
|
||||
},
|
||||
|
||||
// Multi-Image Analysis Tool
|
||||
{
|
||||
name: 'multi_image_analysis',
|
||||
description: 'Analyze one or more images with a prompt and receive detailed responses',
|
||||
name: 'mcp_openrouter_multi_image_analysis',
|
||||
description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
@@ -269,14 +294,21 @@ export class ToolHandlers {
|
||||
|
||||
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
switch (request.params.name) {
|
||||
case 'chat_completion':
|
||||
case 'mcp_openrouter_chat_completion':
|
||||
return handleChatCompletion({
|
||||
params: {
|
||||
arguments: request.params.arguments as unknown as ChatCompletionToolRequest
|
||||
}
|
||||
}, this.openai, this.defaultModel);
|
||||
|
||||
case 'multi_image_analysis':
|
||||
case 'mcp_openrouter_analyze_image':
|
||||
return handleAnalyzeImage({
|
||||
params: {
|
||||
arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
|
||||
}
|
||||
}, this.openai, this.defaultModel);
|
||||
|
||||
case 'mcp_openrouter_multi_image_analysis':
|
||||
return handleMultiImageAnalysis({
|
||||
params: {
|
||||
arguments: request.params.arguments as unknown as MultiImageAnalysisToolRequest
|
||||
|
||||
@@ -4,10 +4,13 @@ import sharp from 'sharp';
|
||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||
import OpenAI from 'openai';
|
||||
import fetch from 'node-fetch';
|
||||
import { findSuitableFreeModel } from './multi-image-analysis.js';
|
||||
|
||||
// Default model for image analysis
|
||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||
|
||||
export interface AnalyzeImageToolRequest {
|
||||
image_path?: string;
|
||||
image_url?: string;
|
||||
image_path: string;
|
||||
question?: string;
|
||||
model?: string;
|
||||
}
|
||||
@@ -83,6 +86,70 @@ async function processImage(buffer: Buffer): Promise<string> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the image at the given path to a base64 string
|
||||
*/
|
||||
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
|
||||
try {
|
||||
// Ensure the image path is absolute
|
||||
if (!path.isAbsolute(imagePath)) {
|
||||
throw new McpError(
|
||||
ErrorCode.InvalidParams,
|
||||
'Image path must be absolute'
|
||||
);
|
||||
}
|
||||
|
||||
// Check if the file exists
|
||||
try {
|
||||
await fs.access(imagePath);
|
||||
} catch (error) {
|
||||
throw new McpError(
|
||||
ErrorCode.InvalidParams,
|
||||
`File not found: ${imagePath}`
|
||||
);
|
||||
}
|
||||
|
||||
// Read the file as a buffer
|
||||
const buffer = await fs.readFile(imagePath);
|
||||
|
||||
// Determine MIME type from file extension
|
||||
const extension = path.extname(imagePath).toLowerCase();
|
||||
let mimeType: string;
|
||||
|
||||
switch (extension) {
|
||||
case '.png':
|
||||
mimeType = 'image/png';
|
||||
break;
|
||||
case '.jpg':
|
||||
case '.jpeg':
|
||||
mimeType = 'image/jpeg';
|
||||
break;
|
||||
case '.webp':
|
||||
mimeType = 'image/webp';
|
||||
break;
|
||||
case '.gif':
|
||||
mimeType = 'image/gif';
|
||||
break;
|
||||
case '.bmp':
|
||||
mimeType = 'image/bmp';
|
||||
break;
|
||||
default:
|
||||
mimeType = 'application/octet-stream';
|
||||
}
|
||||
|
||||
// Convert buffer to base64
|
||||
const base64 = buffer.toString('base64');
|
||||
|
||||
return { base64, mimeType };
|
||||
} catch (error) {
|
||||
console.error('Error converting image to base64:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for analyzing a single image
|
||||
*/
|
||||
export async function handleAnalyzeImage(
|
||||
request: { params: { arguments: AnalyzeImageToolRequest } },
|
||||
openai: OpenAI,
|
||||
@@ -91,71 +158,62 @@ export async function handleAnalyzeImage(
|
||||
const args = request.params.arguments;
|
||||
|
||||
try {
|
||||
// Validate image source
|
||||
const imagePath = args.image_path;
|
||||
const imageUrl = args.image_url;
|
||||
|
||||
if (!imagePath && !imageUrl) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'Either image_path or image_url must be provided');
|
||||
// Validate inputs
|
||||
if (!args.image_path) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
|
||||
}
|
||||
|
||||
// Normalize the path/url
|
||||
let imageSource: string;
|
||||
|
||||
if (imageUrl) {
|
||||
// Use the provided URL directly
|
||||
imageSource = imageUrl;
|
||||
} else if (imagePath) {
|
||||
// For backward compatibility, try to handle the image_path
|
||||
if (path.isAbsolute(imagePath)) {
|
||||
// For absolute paths, use as a local file path
|
||||
imageSource = imagePath;
|
||||
} else {
|
||||
// For relative paths, show a better error message
|
||||
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute or use image_url with file:// prefix');
|
||||
}
|
||||
} else {
|
||||
// This shouldn't happen due to the check above, but TypeScript doesn't know that
|
||||
throw new McpError(ErrorCode.InvalidParams, 'No image source provided');
|
||||
if (!args.question) {
|
||||
throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
|
||||
}
|
||||
|
||||
// Fetch and process the image
|
||||
const imageBuffer = await fetchImageAsBuffer(imageSource);
|
||||
console.error(`Successfully read image buffer of size: ${imageBuffer.length}`);
|
||||
console.error(`Processing image: ${args.image_path}`);
|
||||
|
||||
// Process the image (resize if needed)
|
||||
const base64Image = await processImage(imageBuffer);
|
||||
// Convert the image to base64
|
||||
const { base64, mimeType } = await imageToBase64(args.image_path);
|
||||
|
||||
// Select model
|
||||
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
|
||||
|
||||
// Prepare message with image
|
||||
const messages = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
// Create the content array for the OpenAI API
|
||||
const content = [
|
||||
{
|
||||
type: 'text',
|
||||
text: args.question || "What's in this image?"
|
||||
text: args.question
|
||||
},
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${base64Image}`
|
||||
url: `data:${mimeType};base64,${base64}`
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
];
|
||||
|
||||
console.error('Sending request to OpenRouter...');
|
||||
// Select model with priority:
|
||||
// 1. User-specified model
|
||||
// 2. Default model from environment
|
||||
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
||||
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||
|
||||
// Call OpenRouter API
|
||||
// If a model is specified but not our default free model, verify it exists
|
||||
if (model !== DEFAULT_FREE_MODEL) {
|
||||
try {
|
||||
await openai.models.retrieve(model);
|
||||
} catch (error) {
|
||||
console.error(`Specified model ${model} not found, falling back to auto-selection`);
|
||||
model = await findSuitableFreeModel(openai);
|
||||
}
|
||||
}
|
||||
|
||||
console.error(`Making API call with model: ${model}`);
|
||||
|
||||
// Make the API call
|
||||
const completion = await openai.chat.completions.create({
|
||||
model,
|
||||
messages: messages as any,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content
|
||||
}] as any
|
||||
});
|
||||
|
||||
// Return the analysis result
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
@@ -163,9 +221,13 @@ export async function handleAnalyzeImage(
|
||||
text: completion.choices[0].message.content || '',
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
model: completion.model,
|
||||
usage: completion.usage
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error analyzing image:', error);
|
||||
console.error('Error in image analysis:', error);
|
||||
|
||||
if (error instanceof McpError) {
|
||||
throw error;
|
||||
@@ -179,6 +241,10 @@ export async function handleAnalyzeImage(
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
metadata: {
|
||||
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
||||
error_message: error instanceof Error ? error.message : String(error)
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,26 @@ import fetch from 'node-fetch';
|
||||
import sharp from 'sharp';
|
||||
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
||||
import OpenAI from 'openai';
|
||||
import path from 'path';
|
||||
import { promises as fs } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
// Remove uuid import as we'll use a simple random string generator instead
|
||||
// import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
// Default model for image analysis
|
||||
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
|
||||
|
||||
// Image processing constants
|
||||
const MAX_DIMENSION = 800;
|
||||
const JPEG_QUALITY = 80;
|
||||
const MAX_RETRY_ATTEMPTS = 3;
|
||||
const RETRY_DELAY = 1000; // ms
|
||||
|
||||
// Simple random ID generator to replace uuid
|
||||
function generateRandomId(): string {
|
||||
return Math.random().toString(36).substring(2, 15) +
|
||||
Math.random().toString(36).substring(2, 15);
|
||||
}
|
||||
|
||||
export interface MultiImageAnalysisToolRequest {
|
||||
images: Array<{
|
||||
@@ -13,45 +33,139 @@ export interface MultiImageAnalysisToolRequest {
|
||||
model?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep function for retry mechanisms
|
||||
*/
|
||||
const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
|
||||
|
||||
/**
|
||||
* Get MIME type from file extension or data URL
|
||||
*/
|
||||
function getMimeType(url: string): string {
|
||||
if (url.startsWith('data:')) {
|
||||
const match = url.match(/^data:([^;]+);/);
|
||||
return match ? match[1] : 'application/octet-stream';
|
||||
}
|
||||
|
||||
const extension = path.extname(url.split('?')[0]).toLowerCase();
|
||||
|
||||
switch (extension) {
|
||||
case '.png': return 'image/png';
|
||||
case '.jpg':
|
||||
case '.jpeg': return 'image/jpeg';
|
||||
case '.webp': return 'image/webp';
|
||||
case '.gif': return 'image/gif';
|
||||
case '.bmp': return 'image/bmp';
|
||||
case '.svg': return 'image/svg+xml';
|
||||
default: return 'application/octet-stream';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch image from various sources: data URLs, file paths, or remote URLs
|
||||
*/
|
||||
async function fetchImageAsBuffer(url: string): Promise<Buffer> {
|
||||
try {
|
||||
// Handle data URLs
|
||||
if (url.startsWith('data:')) {
|
||||
const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
|
||||
if (!matches || matches.length !== 3) {
|
||||
throw new Error('Invalid data URL');
|
||||
throw new Error('Invalid data URL format');
|
||||
}
|
||||
return Buffer.from(matches[2], 'base64');
|
||||
}
|
||||
|
||||
// Handle file URLs
|
||||
// Handle file URLs with file:// protocol
|
||||
if (url.startsWith('file://')) {
|
||||
const filePath = url.replace('file://', '');
|
||||
const fs = await import('fs/promises');
|
||||
try {
|
||||
return await fs.readFile(filePath);
|
||||
} catch (error) {
|
||||
console.error(`Error reading file at ${filePath}:`, error);
|
||||
throw new Error(`Failed to read file: ${filePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle absolute and relative file paths
|
||||
if (url.startsWith('/') || url.startsWith('./') || url.startsWith('../') || /^[A-Za-z]:\\/.test(url)) {
|
||||
try {
|
||||
return await fs.readFile(url);
|
||||
} catch (error) {
|
||||
console.error(`Error reading file at ${url}:`, error);
|
||||
throw new Error(`Failed to read file: ${url}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle http/https URLs
|
||||
const response = await fetch(url);
|
||||
if (url.startsWith('http://') || url.startsWith('https://')) {
|
||||
for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
// Use AbortController for timeout instead of timeout option
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), 15000);
|
||||
|
||||
const response = await fetch(url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': 'OpenRouter-MCP-Server/1.0'
|
||||
}
|
||||
});
|
||||
|
||||
// Clear the timeout to prevent memory leaks
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
} catch (error) {
|
||||
console.error(`Error fetching URL (attempt ${attempt + 1}/${MAX_RETRY_ATTEMPTS}): ${url}`, error);
|
||||
|
||||
if (attempt < MAX_RETRY_ATTEMPTS - 1) {
|
||||
// Exponential backoff with jitter
|
||||
const delay = RETRY_DELAY * Math.pow(2, attempt) * (0.5 + Math.random() * 0.5);
|
||||
await sleep(delay);
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we get here, the URL format is unsupported
|
||||
throw new Error(`Unsupported URL format: ${url}`);
|
||||
} catch (error) {
|
||||
console.error(`Error fetching image from ${url}:`, error);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// TypeScript requires a return statement here, but this is unreachable
|
||||
return Buffer.from([]);
|
||||
}
|
||||
|
||||
async function processImage(buffer: Buffer): Promise<string> {
|
||||
/**
|
||||
* Process and optimize image for API consumption
|
||||
*/
|
||||
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
|
||||
try {
|
||||
// Get image metadata
|
||||
const metadata = await sharp(buffer).metadata();
|
||||
// Create a temporary directory for processing if needed
|
||||
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
|
||||
await fs.mkdir(tempDir, { recursive: true });
|
||||
|
||||
// Calculate dimensions to keep base64 size reasonable
|
||||
const MAX_DIMENSION = 800;
|
||||
const JPEG_QUALITY = 80;
|
||||
// Get image info
|
||||
let sharpInstance = sharp(buffer);
|
||||
const metadata = await sharpInstance.metadata();
|
||||
|
||||
// Skip processing for small images
|
||||
if (metadata.width && metadata.height &&
|
||||
metadata.width <= MAX_DIMENSION &&
|
||||
metadata.height <= MAX_DIMENSION &&
|
||||
(mimeType === 'image/jpeg' || mimeType === 'image/webp')) {
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
|
||||
// Resize larger images
|
||||
if (metadata.width && metadata.height) {
|
||||
const largerDimension = Math.max(metadata.width, metadata.height);
|
||||
if (largerDimension > MAX_DIMENSION) {
|
||||
@@ -59,44 +173,65 @@ async function processImage(buffer: Buffer): Promise<string> {
|
||||
? { width: MAX_DIMENSION }
|
||||
: { height: MAX_DIMENSION };
|
||||
|
||||
const resizedBuffer = await sharp(buffer)
|
||||
.resize(resizeOptions)
|
||||
.jpeg({ quality: JPEG_QUALITY })
|
||||
.toBuffer();
|
||||
|
||||
return resizedBuffer.toString('base64');
|
||||
sharpInstance = sharpInstance.resize(resizeOptions);
|
||||
}
|
||||
}
|
||||
|
||||
// If no resizing needed, just convert to JPEG
|
||||
const jpegBuffer = await sharp(buffer)
|
||||
// Convert to JPEG for consistency and small size
|
||||
const processedBuffer = await sharpInstance
|
||||
.jpeg({ quality: JPEG_QUALITY })
|
||||
.toBuffer();
|
||||
|
||||
return jpegBuffer.toString('base64');
|
||||
return processedBuffer.toString('base64');
|
||||
} catch (error) {
|
||||
console.error('Error processing image:', error);
|
||||
throw error;
|
||||
|
||||
// If sharp processing fails, return the original buffer
|
||||
// This is a fallback to ensure we don't completely fail on processing errors
|
||||
console.error('Returning original image without processing');
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
}
|
||||
|
||||
// Find a suitable free model with vision capabilities
|
||||
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
/**
|
||||
* Find a suitable free model with vision capabilities, defaulting to Qwen
|
||||
*/
|
||||
export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
try {
|
||||
// Query available models with 'free' in their name
|
||||
const modelsResponse = await openai.models.list();
|
||||
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||
// First try with an exact match for our preferred model
|
||||
const preferredModel = DEFAULT_FREE_MODEL;
|
||||
|
||||
try {
|
||||
// Check if our preferred model is available
|
||||
const modelInfo = await openai.models.retrieve(preferredModel);
|
||||
if (modelInfo && modelInfo.id) {
|
||||
console.error(`Using preferred model: ${preferredModel}`);
|
||||
return preferredModel;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Preferred model ${preferredModel} not available, searching for alternatives...`);
|
||||
}
|
||||
|
||||
// Filter models with 'free' in ID and multimodal capabilities
|
||||
const freeModels = modelsResponse.data
|
||||
.filter(model => model.id.includes('free'))
|
||||
// Query available models
|
||||
const modelsResponse = await openai.models.list();
|
||||
if (!modelsResponse?.data || modelsResponse.data.length === 0) {
|
||||
console.error('No models found, using default fallback model');
|
||||
return DEFAULT_FREE_MODEL;
|
||||
}
|
||||
|
||||
// First, try to find free vision models
|
||||
const freeVisionModels = modelsResponse.data
|
||||
.filter(model => {
|
||||
const modelId = model.id.toLowerCase();
|
||||
return modelId.includes('free') &&
|
||||
(modelId.includes('vl') || modelId.includes('vision') || modelId.includes('claude') ||
|
||||
modelId.includes('gemini') || modelId.includes('gpt-4') || modelId.includes('qwen'));
|
||||
})
|
||||
.map(model => {
|
||||
// Try to extract context length from the model object
|
||||
// Extract context length if available
|
||||
let contextLength = 0;
|
||||
try {
|
||||
const modelAny = model as any; // Cast to any to access non-standard properties
|
||||
const modelAny = model as any;
|
||||
if (typeof modelAny.context_length === 'number') {
|
||||
contextLength = modelAny.context_length;
|
||||
} else if (modelAny.context_window) {
|
||||
@@ -112,21 +247,26 @@ async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
};
|
||||
});
|
||||
|
||||
if (freeModels.length === 0) {
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
|
||||
}
|
||||
|
||||
if (freeVisionModels.length > 0) {
|
||||
// Sort by context length and pick the one with the largest context window
|
||||
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
||||
freeVisionModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||
const selectedModel = freeVisionModels[0].id;
|
||||
console.error(`Selected free vision model: ${selectedModel} with context length: ${freeVisionModels[0].contextLength}`);
|
||||
return selectedModel;
|
||||
}
|
||||
|
||||
return freeModels[0].id;
|
||||
// If no free vision models found, fallback to our default
|
||||
console.error('No free vision models found, using default fallback model');
|
||||
return DEFAULT_FREE_MODEL;
|
||||
} catch (error) {
|
||||
console.error('Error finding suitable free model:', error);
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||
console.error('Error finding suitable model:', error);
|
||||
return DEFAULT_FREE_MODEL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process and analyze multiple images using OpenRouter
|
||||
*/
|
||||
export async function handleMultiImageAnalysis(
|
||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||
openai: OpenAI,
|
||||
@@ -150,43 +290,65 @@ export async function handleMultiImageAnalysis(
|
||||
text: args.prompt
|
||||
}];
|
||||
|
||||
// Track successful and failed images for reporting
|
||||
const successfulImages = [];
|
||||
const failedImages = [];
|
||||
|
||||
// Process each image
|
||||
for (const image of args.images) {
|
||||
for (const [index, image] of args.images.entries()) {
|
||||
try {
|
||||
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
|
||||
|
||||
// Get MIME type
|
||||
const mimeType = getMimeType(image.url);
|
||||
|
||||
// Fetch and process the image
|
||||
const imageBuffer = await fetchImageAsBuffer(image.url);
|
||||
const base64Image = await processImage(imageBuffer);
|
||||
const base64Image = await processImage(imageBuffer, mimeType);
|
||||
|
||||
// Use JPEG as the output format for consistency
|
||||
const outputMimeType = 'image/jpeg';
|
||||
|
||||
// Add to content
|
||||
content.push({
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${base64Image}`
|
||||
url: `data:${outputMimeType};base64,${base64Image}`
|
||||
}
|
||||
});
|
||||
|
||||
successfulImages.push(image.url);
|
||||
} catch (error) {
|
||||
console.error(`Error processing image ${image.url}:`, error);
|
||||
console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
|
||||
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
|
||||
// Continue with other images if one fails
|
||||
}
|
||||
}
|
||||
|
||||
// If no images were successfully processed
|
||||
if (content.length === 1) {
|
||||
throw new Error('Failed to process any of the provided images');
|
||||
const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
|
||||
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
|
||||
}
|
||||
|
||||
// Select model with priority:
|
||||
// 1. User-specified model
|
||||
// 2. Default model from environment
|
||||
// 3. Free model with vision capabilities (selected automatically)
|
||||
let model = args.model || defaultModel;
|
||||
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
|
||||
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
|
||||
|
||||
if (!model) {
|
||||
// If a model is specified but not our default free model, verify it exists
|
||||
if (model !== DEFAULT_FREE_MODEL) {
|
||||
try {
|
||||
await openai.models.retrieve(model);
|
||||
} catch (error) {
|
||||
console.error(`Specified model ${model} not found, falling back to auto-selection`);
|
||||
model = await findSuitableFreeModel(openai);
|
||||
console.error(`Using auto-selected model: ${model}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.error(`Making API call with model: ${model}`);
|
||||
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
|
||||
|
||||
// Make the API call
|
||||
const completion = await openai.chat.completions.create({
|
||||
@@ -197,13 +359,32 @@ export async function handleMultiImageAnalysis(
|
||||
}] as any
|
||||
});
|
||||
|
||||
// Format the response
|
||||
let responseText = completion.choices[0].message.content || '';
|
||||
|
||||
// Add information about failed images if any
|
||||
if (failedImages.length > 0) {
|
||||
const formattedErrors = args.markdown_response !== false
|
||||
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
|
||||
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
|
||||
|
||||
responseText += formattedErrors;
|
||||
}
|
||||
|
||||
// Return the analysis result
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: completion.choices[0].message.content || '',
|
||||
text: responseText,
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
model: completion.model,
|
||||
usage: completion.usage,
|
||||
successful_images: successfulImages.length,
|
||||
failed_images: failedImages.length
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error in multi-image analysis:', error);
|
||||
@@ -220,6 +401,10 @@ export async function handleMultiImageAnalysis(
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
metadata: {
|
||||
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
|
||||
error_message: error instanceof Error ? error.message : String(error)
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user