Enhance MCP server (v1.2.0): Consolidate image analysis, add auto model selection

2025-03-27 13:42:13 +05:30
parent 74d2997547
commit 3f9840d884
5 changed files with 130 additions and 75 deletions
--- a/README.md
+++ b/README.md
@@ -192,22 +192,6 @@ use_mcp_tool({
 });
 ```
 ### analyze_image
 Analyze a single image with an optional question:
 ```javascript
 use_mcp_tool({
  server_name: "openrouter",
  tool_name: "analyze_image",
  arguments: {
    image_path: "/absolute/path/to/image.jpg",
    question: "What objects are in this image?", // Optional
    model: "anthropic/claude-3.5-sonnet" // Optional if default is set
  }
 });
 ```
 ### multi_image_analysis
 Analyze multiple images with a single prompt:
@@ -329,3 +313,10 @@ npm run build
 ## License
 MIT License
 ## Version 1.2.0 Updates
 - Simplified image analysis by consolidating all functionality into the `multi_image_analysis` tool
 - Added automatic selection of free models with the largest context window when no model is specified
 - Improved handling of various image formats (file://, http://, data:)
 - Enhanced error handling and logging for better troubleshooting
 - Removed the `analyze_image` tool to eliminate confusion and streamline the interface
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@stabgan/openrouter-mcp-multimodal",
-  "version": "1.1.0",
+  "version": "1.2.0",
  "description": "MCP server for OpenRouter providing text chat and image analysis tools",
  "type": "module",
  "main": "dist/index.js",
--- a/src/tool-handlers.ts
+++ b/src/tool-handlers.ts
@@ -15,7 +15,6 @@ import { handleChatCompletion, ChatCompletionToolRequest } from './tool-handlers
 import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js';
 import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
 import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
 import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
 import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
 export class ToolHandlers {
@@ -128,41 +127,10 @@ export class ToolHandlers {
          maxContextTokens: 200000
        },
        // Image Analysis Tool
        {
          name: 'analyze_image',
          description: 'Analyze an image using OpenRouter vision models',
          inputSchema: {
            type: 'object',
            properties: {
              image_path: {
                type: 'string',
                description: 'Path to the image file to analyze (must be an absolute path)',
              },
              image_url: {
                type: 'string',
                description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)',
              },
              question: {
                type: 'string',
                description: 'Question to ask about the image',
              },
              model: {
                type: 'string',
                description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
              },
            },
            oneOf: [
              { required: ['image_path'] },
              { required: ['image_url'] }
            ]
          },
        },
        // Multi-Image Analysis Tool
        {
          name: 'multi_image_analysis',
-          description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
+          description: 'Analyze one or more images with a prompt and receive detailed responses',
          inputSchema: {
            type: 'object',
            properties: {
@@ -174,7 +142,7 @@ export class ToolHandlers {
                  properties: {
                    url: {
                      type: 'string',
-                      description: 'URL or data URL of the image (can be a file:// URL to read from local filesystem)',
+                      description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
                    },
                    alt: {
                      type: 'string',
@@ -195,7 +163,7 @@ export class ToolHandlers {
              },
              model: {
                type: 'string',
-                description: 'OpenRouter model to use (defaults to claude-3.5-sonnet if not specified)',
+                description: 'OpenRouter model to use. If not specified, the system will use a free model with vision capabilities or the default model.',
              },
            },
            required: ['images', 'prompt'],
@@ -308,13 +276,6 @@ export class ToolHandlers {
            }
          }, this.openai, this.defaultModel);
        case 'analyze_image':
          return handleAnalyzeImage({
            params: {
              arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
            }
          }, this.openai, this.defaultModel);
        case 'multi_image_analysis':
          return handleMultiImageAnalysis({
            params: {
--- a/src/tool-handlers/chat-completion.ts
+++ b/src/tool-handlers/chat-completion.ts
@@ -66,6 +66,53 @@ function truncateMessagesToFit(
  return truncated;
 }
 // Find a suitable free model with the largest context window
 async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
  try {
    // Query available models with 'free' in their name
    const modelsResponse = await openai.models.list();
    if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
      return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
    }
    // Filter models with 'free' in ID
    const freeModels = modelsResponse.data
      .filter(model => model.id.includes('free'))
      .map(model => {
        // Try to extract context length from the model object
        let contextLength = 0;
        try {
          const modelAny = model as any; // Cast to any to access non-standard properties
          if (typeof modelAny.context_length === 'number') {
            contextLength = modelAny.context_length;
          } else if (modelAny.context_window) {
            contextLength = parseInt(modelAny.context_window, 10);
          }
        } catch (e) {
          console.error(`Error parsing context length for model ${model.id}:`, e);
        }
        return {
          id: model.id,
          contextLength: contextLength || 0
        };
      });
    if (freeModels.length === 0) {
      return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback if no free models found
    }
    // Sort by context length and pick the one with the largest context window
    freeModels.sort((a, b) => b.contextLength - a.contextLength);
    console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
    return freeModels[0].id;
  } catch (error) {
    console.error('Error finding suitable free model:', error);
    return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
  }
 }
 export async function handleChatCompletion(
  request: { params: { arguments: ChatCompletionToolRequest } },
  openai: OpenAI,
@@ -73,20 +120,6 @@ export async function handleChatCompletion(
 ) {
  const args = request.params.arguments;
  // Validate model selection
  const model = args.model || defaultModel;
  if (!model) {
    return {
      content: [
        {
          type: 'text',
          text: 'No model specified and no default model configured in MCP settings. Please specify a model or set OPENROUTER_DEFAULT_MODEL in the MCP configuration.',
        },
      ],
      isError: true,
    };
  }
  // Validate message array
  if (args.messages.length === 0) {
    return {
@@ -101,9 +134,22 @@ export async function handleChatCompletion(
  }
  try {
    // Select model with priority:
    // 1. User-specified model
    // 2. Default model from environment
    // 3. Free model with the largest context window (selected automatically)
    let model = args.model || defaultModel;
    if (!model) {
      model = await findSuitableFreeModel(openai);
      console.error(`Using auto-selected model: ${model}`);
    }
    // Truncate messages to fit within context window
    const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS);
    console.error(`Making API call with model: ${model}`);
    const completion = await openai.chat.completions.create({
      model,
      messages: truncatedMessages,
--- a/src/tool-handlers/multi-image-analysis.ts
+++ b/src/tool-handlers/multi-image-analysis.ts
@@ -80,6 +80,53 @@ async function processImage(buffer: Buffer): Promise<string> {
  }
 }
 // Find a suitable free model with vision capabilities
 async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
  try {
    // Query available models with 'free' in their name
    const modelsResponse = await openai.models.list();
    if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
      return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
    }
    // Filter models with 'free' in ID and multimodal capabilities
    const freeModels = modelsResponse.data
      .filter(model => model.id.includes('free'))
      .map(model => {
        // Try to extract context length from the model object
        let contextLength = 0;
        try {
          const modelAny = model as any; // Cast to any to access non-standard properties
          if (typeof modelAny.context_length === 'number') {
            contextLength = modelAny.context_length;
          } else if (modelAny.context_window) {
            contextLength = parseInt(modelAny.context_window, 10);
          }
        } catch (e) {
          console.error(`Error parsing context length for model ${model.id}:`, e);
        }
        return {
          id: model.id,
          contextLength: contextLength || 0
        };
      });
    if (freeModels.length === 0) {
      return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
    }
    // Sort by context length and pick the one with the largest context window
    freeModels.sort((a, b) => b.contextLength - a.contextLength);
    console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
    return freeModels[0].id;
  } catch (error) {
    console.error('Error finding suitable free model:', error);
    return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
  }
 }
 export async function handleMultiImageAnalysis(
  request: { params: { arguments: MultiImageAnalysisToolRequest } },
  openai: OpenAI,
@@ -128,8 +175,18 @@ export async function handleMultiImageAnalysis(
      throw new Error('Failed to process any of the provided images');
    }
-    // Select model
+    // Select model with priority:
-    const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
+    // 1. User-specified model
    // 2. Default model from environment
    // 3. Free model with vision capabilities (selected automatically)
    let model = args.model || defaultModel;
    if (!model) {
      model = await findSuitableFreeModel(openai);
      console.error(`Using auto-selected model: ${model}`);
    }
    console.error(`Making API call with model: ${model}`);
    // Make the API call
    const completion = await openai.chat.completions.create({