Enhance MCP server (v1.2.0): Consolidate image analysis, add auto model selection
This commit is contained in:
23
README.md
23
README.md
@@ -192,22 +192,6 @@ use_mcp_tool({
|
||||
});
|
||||
```
|
||||
|
||||
### analyze_image
|
||||
|
||||
Analyze a single image with an optional question:
|
||||
|
||||
```javascript
|
||||
use_mcp_tool({
|
||||
server_name: "openrouter",
|
||||
tool_name: "analyze_image",
|
||||
arguments: {
|
||||
image_path: "/absolute/path/to/image.jpg",
|
||||
question: "What objects are in this image?", // Optional
|
||||
model: "anthropic/claude-3.5-sonnet" // Optional if default is set
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### multi_image_analysis
|
||||
|
||||
Analyze multiple images with a single prompt:
|
||||
@@ -329,3 +313,10 @@ npm run build
|
||||
## License
|
||||
|
||||
MIT License
|
||||
|
||||
## Version 1.2.0 Updates
|
||||
- Simplified image analysis by consolidating all functionality into the `multi_image_analysis` tool
|
||||
- Added automatic selection of free models with the largest context window when no model is specified
|
||||
- Improved handling of various image formats (file://, http://, data:)
|
||||
- Enhanced error handling and logging for better troubleshooting
|
||||
- Removed the `analyze_image` tool to eliminate confusion and streamline the interface
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@stabgan/openrouter-mcp-multimodal",
|
||||
"version": "1.1.0",
|
||||
"version": "1.2.0",
|
||||
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
|
||||
@@ -15,7 +15,6 @@ import { handleChatCompletion, ChatCompletionToolRequest } from './tool-handlers
|
||||
import { handleSearchModels, SearchModelsToolRequest } from './tool-handlers/search-models.js';
|
||||
import { handleGetModelInfo, GetModelInfoToolRequest } from './tool-handlers/get-model-info.js';
|
||||
import { handleValidateModel, ValidateModelToolRequest } from './tool-handlers/validate-model.js';
|
||||
import { handleAnalyzeImage, AnalyzeImageToolRequest } from './tool-handlers/analyze-image.js';
|
||||
import { handleMultiImageAnalysis, MultiImageAnalysisToolRequest } from './tool-handlers/multi-image-analysis.js';
|
||||
|
||||
export class ToolHandlers {
|
||||
@@ -128,41 +127,10 @@ export class ToolHandlers {
|
||||
maxContextTokens: 200000
|
||||
},
|
||||
|
||||
// Image Analysis Tool
|
||||
{
|
||||
name: 'analyze_image',
|
||||
description: 'Analyze an image using OpenRouter vision models',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
image_path: {
|
||||
type: 'string',
|
||||
description: 'Path to the image file to analyze (must be an absolute path)',
|
||||
},
|
||||
image_url: {
|
||||
type: 'string',
|
||||
description: 'URL or data URL of the image (can be a file:// URL, http(s):// URL, or data: URI)',
|
||||
},
|
||||
question: {
|
||||
type: 'string',
|
||||
description: 'Question to ask about the image',
|
||||
},
|
||||
model: {
|
||||
type: 'string',
|
||||
description: 'OpenRouter model to use (e.g., "anthropic/claude-3.5-sonnet")',
|
||||
},
|
||||
},
|
||||
oneOf: [
|
||||
{ required: ['image_path'] },
|
||||
{ required: ['image_url'] }
|
||||
]
|
||||
},
|
||||
},
|
||||
|
||||
// Multi-Image Analysis Tool
|
||||
{
|
||||
name: 'multi_image_analysis',
|
||||
description: 'Analyze multiple images at once with a single prompt and receive detailed responses',
|
||||
description: 'Analyze one or more images with a prompt and receive detailed responses',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
@@ -174,7 +142,7 @@ export class ToolHandlers {
|
||||
properties: {
|
||||
url: {
|
||||
type: 'string',
|
||||
description: 'URL or data URL of the image (can be a file:// URL to read from local filesystem)',
|
||||
description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
|
||||
},
|
||||
alt: {
|
||||
type: 'string',
|
||||
@@ -195,7 +163,7 @@ export class ToolHandlers {
|
||||
},
|
||||
model: {
|
||||
type: 'string',
|
||||
description: 'OpenRouter model to use (defaults to claude-3.5-sonnet if not specified)',
|
||||
description: 'OpenRouter model to use. If not specified, the system will use a free model with vision capabilities or the default model.',
|
||||
},
|
||||
},
|
||||
required: ['images', 'prompt'],
|
||||
@@ -308,13 +276,6 @@ export class ToolHandlers {
|
||||
}
|
||||
}, this.openai, this.defaultModel);
|
||||
|
||||
case 'analyze_image':
|
||||
return handleAnalyzeImage({
|
||||
params: {
|
||||
arguments: request.params.arguments as unknown as AnalyzeImageToolRequest
|
||||
}
|
||||
}, this.openai, this.defaultModel);
|
||||
|
||||
case 'multi_image_analysis':
|
||||
return handleMultiImageAnalysis({
|
||||
params: {
|
||||
|
||||
@@ -66,6 +66,53 @@ function truncateMessagesToFit(
|
||||
return truncated;
|
||||
}
|
||||
|
||||
// Find a suitable free model with the largest context window
|
||||
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
try {
|
||||
// Query available models with 'free' in their name
|
||||
const modelsResponse = await openai.models.list();
|
||||
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
||||
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
|
||||
}
|
||||
|
||||
// Filter models with 'free' in ID
|
||||
const freeModels = modelsResponse.data
|
||||
.filter(model => model.id.includes('free'))
|
||||
.map(model => {
|
||||
// Try to extract context length from the model object
|
||||
let contextLength = 0;
|
||||
try {
|
||||
const modelAny = model as any; // Cast to any to access non-standard properties
|
||||
if (typeof modelAny.context_length === 'number') {
|
||||
contextLength = modelAny.context_length;
|
||||
} else if (modelAny.context_window) {
|
||||
contextLength = parseInt(modelAny.context_window, 10);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Error parsing context length for model ${model.id}:`, e);
|
||||
}
|
||||
|
||||
return {
|
||||
id: model.id,
|
||||
contextLength: contextLength || 0
|
||||
};
|
||||
});
|
||||
|
||||
if (freeModels.length === 0) {
|
||||
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback if no free models found
|
||||
}
|
||||
|
||||
// Sort by context length and pick the one with the largest context window
|
||||
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
||||
|
||||
return freeModels[0].id;
|
||||
} catch (error) {
|
||||
console.error('Error finding suitable free model:', error);
|
||||
return 'deepseek/deepseek-chat-v3-0324:free'; // Fallback to a known model
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleChatCompletion(
|
||||
request: { params: { arguments: ChatCompletionToolRequest } },
|
||||
openai: OpenAI,
|
||||
@@ -73,20 +120,6 @@ export async function handleChatCompletion(
|
||||
) {
|
||||
const args = request.params.arguments;
|
||||
|
||||
// Validate model selection
|
||||
const model = args.model || defaultModel;
|
||||
if (!model) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'No model specified and no default model configured in MCP settings. Please specify a model or set OPENROUTER_DEFAULT_MODEL in the MCP configuration.',
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate message array
|
||||
if (args.messages.length === 0) {
|
||||
return {
|
||||
@@ -101,8 +134,21 @@ export async function handleChatCompletion(
|
||||
}
|
||||
|
||||
try {
|
||||
// Select model with priority:
|
||||
// 1. User-specified model
|
||||
// 2. Default model from environment
|
||||
// 3. Free model with the largest context window (selected automatically)
|
||||
let model = args.model || defaultModel;
|
||||
|
||||
if (!model) {
|
||||
model = await findSuitableFreeModel(openai);
|
||||
console.error(`Using auto-selected model: ${model}`);
|
||||
}
|
||||
|
||||
// Truncate messages to fit within context window
|
||||
const truncatedMessages = truncateMessagesToFit(args.messages, MAX_CONTEXT_TOKENS);
|
||||
|
||||
console.error(`Making API call with model: ${model}`);
|
||||
|
||||
const completion = await openai.chat.completions.create({
|
||||
model,
|
||||
|
||||
@@ -80,6 +80,53 @@ async function processImage(buffer: Buffer): Promise<string> {
|
||||
}
|
||||
}
|
||||
|
||||
// Find a suitable free model with vision capabilities
|
||||
async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
|
||||
try {
|
||||
// Query available models with 'free' in their name
|
||||
const modelsResponse = await openai.models.list();
|
||||
if (!modelsResponse || !modelsResponse.data || modelsResponse.data.length === 0) {
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||
}
|
||||
|
||||
// Filter models with 'free' in ID and multimodal capabilities
|
||||
const freeModels = modelsResponse.data
|
||||
.filter(model => model.id.includes('free'))
|
||||
.map(model => {
|
||||
// Try to extract context length from the model object
|
||||
let contextLength = 0;
|
||||
try {
|
||||
const modelAny = model as any; // Cast to any to access non-standard properties
|
||||
if (typeof modelAny.context_length === 'number') {
|
||||
contextLength = modelAny.context_length;
|
||||
} else if (modelAny.context_window) {
|
||||
contextLength = parseInt(modelAny.context_window, 10);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Error parsing context length for model ${model.id}:`, e);
|
||||
}
|
||||
|
||||
return {
|
||||
id: model.id,
|
||||
contextLength: contextLength || 0
|
||||
};
|
||||
});
|
||||
|
||||
if (freeModels.length === 0) {
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback if no free models found
|
||||
}
|
||||
|
||||
// Sort by context length and pick the one with the largest context window
|
||||
freeModels.sort((a, b) => b.contextLength - a.contextLength);
|
||||
console.error(`Selected free model: ${freeModels[0].id} with context length: ${freeModels[0].contextLength}`);
|
||||
|
||||
return freeModels[0].id;
|
||||
} catch (error) {
|
||||
console.error('Error finding suitable free model:', error);
|
||||
return 'qwen/qwen2.5-vl-32b-instruct:free'; // Fallback to a known model
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleMultiImageAnalysis(
|
||||
request: { params: { arguments: MultiImageAnalysisToolRequest } },
|
||||
openai: OpenAI,
|
||||
@@ -128,8 +175,18 @@ export async function handleMultiImageAnalysis(
|
||||
throw new Error('Failed to process any of the provided images');
|
||||
}
|
||||
|
||||
// Select model
|
||||
const model = args.model || defaultModel || 'anthropic/claude-3.5-sonnet';
|
||||
// Select model with priority:
|
||||
// 1. User-specified model
|
||||
// 2. Default model from environment
|
||||
// 3. Free model with vision capabilities (selected automatically)
|
||||
let model = args.model || defaultModel;
|
||||
|
||||
if (!model) {
|
||||
model = await findSuitableFreeModel(openai);
|
||||
console.error(`Using auto-selected model: ${model}`);
|
||||
}
|
||||
|
||||
console.error(`Making API call with model: ${model}`);
|
||||
|
||||
// Make the API call
|
||||
const completion = await openai.chat.completions.create({
|
||||
|
||||
Reference in New Issue
Block a user