fix: Improved base64 image handling and Windows compatibility

This commit is contained in:
stabgan
2025-03-28 12:41:57 +05:30
parent 3d9d07b210
commit 8512f031f7
22 changed files with 4914 additions and 117 deletions

17
.gitignore vendored
View File

@@ -48,3 +48,20 @@ ehthumbs_vista.db
# Testing
coverage/
.nyc_output/
# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# OS files
.DS_Store
Thumbs.db

131
convert_to_base64.html Normal file
View File

@@ -0,0 +1,131 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Image to Base64 Converter</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
.container {
display: flex;
flex-direction: column;
gap: 20px;
}
.preview {
margin-top: 20px;
max-width: 100%;
}
.preview img {
max-width: 100%;
max-height: 300px;
border: 1px solid #ddd;
}
.result {
margin-top: 20px;
}
textarea {
width: 100%;
height: 100px;
margin-top: 10px;
}
button {
padding: 10px 15px;
background-color: #4CAF50;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
button:hover {
background-color: #45a049;
}
.copy-button {
margin-top: 10px;
}
.code-block {
background-color: #f5f5f5;
padding: 15px;
border-radius: 4px;
overflow-x: auto;
margin-top: 20px;
}
</style>
</head>
<body>
<h1>Image to Base64 Converter for MCP Testing</h1>
<p>Use this tool to convert a local image to a base64 string that can be used with the MCP server's multi_image_analysis tool.</p>
<div class="container">
<div>
<label for="imageInput">Select an image:</label><br>
<input type="file" id="imageInput" accept="image/*">
</div>
<div class="preview" id="preview">
<h3>Image Preview:</h3>
<div id="imagePreview"></div>
</div>
<div class="result" id="result">
<h3>Base64 String:</h3>
<textarea id="base64Output" readonly></textarea>
<button class="copy-button" id="copyButton">Copy to Clipboard</button>
</div>
<div>
<h3>How to use with MCP:</h3>
<div class="code-block">
<pre>
{
"images": [
{
"url": "PASTE_BASE64_STRING_HERE"
}
],
"prompt": "Please describe this image in detail. What does it show?",
"model": "qwen/qwen2.5-vl-32b-instruct:free"
}
</pre>
</div>
</div>
</div>
<script>
document.getElementById('imageInput').addEventListener('change', function(event) {
const file = event.target.files[0];
if (!file) return;
// Display image preview
const preview = document.getElementById('imagePreview');
preview.innerHTML = '';
const img = document.createElement('img');
img.src = URL.createObjectURL(file);
preview.appendChild(img);
// Convert to base64
const reader = new FileReader();
reader.onload = function(e) {
const base64String = e.target.result; // This already includes "data:image/jpeg;base64,"
document.getElementById('base64Output').value = base64String;
};
reader.readAsDataURL(file);
});
document.getElementById('copyButton').addEventListener('click', function() {
const textarea = document.getElementById('base64Output');
textarea.select();
document.execCommand('copy');
this.textContent = 'Copied!';
setTimeout(() => {
this.textContent = 'Copy to Clipboard';
}, 2000);
});
</script>
</body>
</html>

89
convert_to_base64.py Normal file
View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
import base64
import argparse
import os
import sys
from pathlib import Path
def convert_image_to_base64(image_path):
"""Convert an image file to base64 encoding with data URI prefix"""
# Get file extension and determine mime type
file_ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.bmp': 'image/bmp'
}.get(file_ext, 'application/octet-stream')
# Read binary data and encode to base64
try:
with open(image_path, 'rb') as img_file:
img_data = img_file.read()
base64_data = base64.b64encode(img_data).decode('utf-8')
return f"data:{mime_type};base64,{base64_data}"
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return None
def save_base64_to_file(base64_data, output_path):
"""Save base64 data to a file"""
try:
with open(output_path, 'w') as out_file:
out_file.write(base64_data)
print(f"Base64 data saved to {output_path}")
return True
except Exception as e:
print(f"Error saving file: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(description='Convert image to base64 for MCP server testing')
parser.add_argument('image_path', help='Path to the image file')
parser.add_argument('-o', '--output', help='Output file path (if not provided, output to console)')
args = parser.parse_args()
# Check if file exists
image_path = Path(args.image_path)
if not image_path.exists():
print(f"Error: File not found: {args.image_path}", file=sys.stderr)
return 1
# Convert image to base64
base64_data = convert_image_to_base64(args.image_path)
if not base64_data:
return 1
# Output base64 data
if args.output:
success = save_base64_to_file(base64_data, args.output)
if not success:
return 1
else:
print("\nBase64 Image Data:")
print(base64_data[:100] + "..." if len(base64_data) > 100 else base64_data)
print("\nTotal length:", len(base64_data))
print("\nTo use with MCP server in multi_image_analysis:")
print('''
{
"images": [
{
"url": "''' + base64_data[:20] + '... (full base64 string)" ' + '''
}
],
"prompt": "Please describe this image in detail. What does it show?",
"model": "qwen/qwen2.5-vl-32b-instruct:free"
}
''')
return 0
if __name__ == "__main__":
sys.exit(main())

78
encode_image.sh Normal file
View File

@@ -0,0 +1,78 @@
#!/bin/bash
# Check if an image file is provided
if [ $# -lt 1 ]; then
echo "Usage: $0 <image_file> [output_file]"
echo "Example: $0 test.png base64_output.txt"
exit 1
fi
IMAGE_FILE="$1"
OUTPUT_FILE="${2:-}" # Use the second argument as output file, if provided
# Check if the image file exists
if [ ! -f "$IMAGE_FILE" ]; then
echo "Error: Image file '$IMAGE_FILE' does not exist."
exit 1
fi
# Get the file extension and determine MIME type
FILE_EXT="${IMAGE_FILE##*.}"
MIME_TYPE="application/octet-stream" # Default MIME type
case "$FILE_EXT" in
png|PNG)
MIME_TYPE="image/png"
;;
jpg|jpeg|JPG|JPEG)
MIME_TYPE="image/jpeg"
;;
gif|GIF)
MIME_TYPE="image/gif"
;;
webp|WEBP)
MIME_TYPE="image/webp"
;;
*)
echo "Warning: Unknown file extension. Using generic MIME type."
;;
esac
# Convert image to base64
echo "Converting '$IMAGE_FILE' to base64..."
# Different commands based on OS
if [ "$(uname)" == "Darwin" ]; then
# macOS
BASE64_DATA="data:$MIME_TYPE;base64,$(base64 -i "$IMAGE_FILE")"
else
# Linux and others
BASE64_DATA="data:$MIME_TYPE;base64,$(base64 -w 0 "$IMAGE_FILE")"
fi
# Output the base64 data
if [ -n "$OUTPUT_FILE" ]; then
# Save to file if output file is specified
echo "$BASE64_DATA" > "$OUTPUT_FILE"
echo "Base64 data saved to '$OUTPUT_FILE'"
echo "Total length: ${#BASE64_DATA} characters"
else
# Display a preview and length if no output file
echo "Base64 Image Data (first 100 chars):"
echo "${BASE64_DATA:0:100}..."
echo "Total length: ${#BASE64_DATA} characters"
echo ""
echo "To use with MCP server in multi_image_analysis:"
echo '{
"images": [
{
"url": "'"${BASE64_DATA:0:20}"'... (full base64 string)"
}
],
"prompt": "Please describe this image in detail. What does it show?",
"model": "qwen/qwen2.5-vl-32b-instruct:free"
}'
fi
exit 0

1594
full_base64.txt Normal file

File diff suppressed because it is too large Load Diff

1611
lena_base64.txt Normal file

File diff suppressed because it is too large Load Diff

183
openrouter-image-python.py Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
import base64
import os
import mimetypes
import requests
from openai import OpenAI
# Constants
OPENROUTER_API_KEY = "your_openrouter_api_key" # Replace with your actual key
IMAGE_PATH = "path/to/your/image.jpg" # Replace with your image path
def image_to_base64(image_path):
"""Convert an image file to base64 with data URI prefix"""
try:
# Determine MIME type
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type:
# Default to generic binary if type cannot be determined
mime_type = "application/octet-stream"
# Read and encode the image
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
# Return data URI
return f"data:{mime_type};base64,{encoded_string}"
except Exception as e:
print(f"Error converting image to base64: {e}")
raise
def send_image_direct_api(base64_image, question="What's in this image?"):
"""Send an image to OpenRouter using direct API call"""
try:
print("Sending image via direct API call...")
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://your-site-url.com", # Optional
"X-Title": "Your Site Name" # Optional
}
payload = {
"model": "anthropic/claude-3-opus", # Choose an appropriate model with vision capabilities
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": question
},
{
"type": "image_url",
"image_url": {
"url": base64_image
}
}
]
}
]
}
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json=payload
)
response.raise_for_status() # Raise exception for non-200 responses
data = response.json()
print("Response from direct API:")
print(data["choices"][0]["message"]["content"])
except Exception as e:
print(f"Error sending image via direct API: {e}")
if hasattr(e, "response") and e.response:
print(f"API error details: {e.response.text}")
def send_image_openai_sdk(base64_image, question="What's in this image?"):
"""Send an image to OpenRouter using OpenAI SDK"""
try:
print("Sending image via OpenAI SDK...")
# Initialize the OpenAI client with OpenRouter base URL
client = OpenAI(
api_key=OPENROUTER_API_KEY,
base_url="https://openrouter.ai/api/v1",
default_headers={
"HTTP-Referer": "https://your-site-url.com", # Optional
"X-Title": "Your Site Name" # Optional
}
)
# Create the message with text and image
completion = client.chat.completions.create(
model="anthropic/claude-3-opus", # Choose an appropriate model with vision capabilities
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": question
},
{
"type": "image_url",
"image_url": {
"url": base64_image
}
}
]
}
]
)
print("Response from OpenAI SDK:")
print(completion.choices[0].message.content)
except Exception as e:
print(f"Error sending image via OpenAI SDK: {e}")
def send_image_from_base64_file(base64_file_path, question="What's in this image?"):
"""Use a pre-encoded base64 file (e.g., from bash script)"""
try:
print("Sending image from base64 file...")
# Read the base64 data from file
with open(base64_file_path, "r") as file:
base64_data = file.read().strip()
# Initialize the OpenAI client
client = OpenAI(
api_key=OPENROUTER_API_KEY,
base_url="https://openrouter.ai/api/v1"
)
# Create the message with text and image
completion = client.chat.completions.create(
model="anthropic/claude-3-opus",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": question
},
{
"type": "image_url",
"image_url": {
"url": base64_data
}
}
]
}
]
)
print("Response when using base64 file:")
print(completion.choices[0].message.content)
except Exception as e:
print(f"Error sending image from base64 file: {e}")
def main():
try:
# Convert the image to base64
base64_image = image_to_base64(IMAGE_PATH)
print("Image converted to base64 successfully")
# Example 1: Using direct API call
send_image_direct_api(base64_image)
# Example 2: Using OpenAI SDK
send_image_openai_sdk(base64_image)
# Example 3: Using a base64 file (if you have one)
# send_image_from_base64_file("path/to/base64.txt")
except Exception as e:
print(f"Error in main function: {e}")
if __name__ == "__main__":
main()

247
openrouter-image-sdk.js Normal file
View File

@@ -0,0 +1,247 @@
/**
* OpenRouter Image Analysis using OpenAI SDK
*
* This script demonstrates how to analyze local images using OpenRouter's API
* through the OpenAI SDK. It supports both command-line usage and can be imported
* as a module for use in other applications.
*
* Usage:
* - Direct: node openrouter-image-sdk.js <image_path> [prompt]
* - As module: import { analyzeImage } from './openrouter-image-sdk.js'
*
* Environment variables:
* - OPENROUTER_API_KEY: Your OpenRouter API key (required)
*/
import 'dotenv/config';
import { promises as fs } from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import { OpenAI } from 'openai';
// ES Module compatibility
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// Constants
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
const MAX_RETRIES = 2;
const RETRY_DELAY = 1000; // milliseconds
/**
* Convert a local image file to base64 format
*
* @param {string} filePath - Path to the image file
* @returns {Promise<string>} - Base64 encoded image with data URI prefix
*/
export async function imageToBase64(filePath) {
try {
// Ensure the file exists
try {
await fs.access(filePath);
} catch (error) {
throw new Error(`Image file not found: ${filePath}`);
}
// Read the file
const imageBuffer = await fs.readFile(filePath);
// Determine MIME type based on file extension
const fileExt = path.extname(filePath).toLowerCase();
let mimeType = 'application/octet-stream';
switch (fileExt) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
case '.gif':
mimeType = 'image/gif';
break;
default:
console.warn(`Unknown file extension: ${fileExt}, using default MIME type`);
}
// Convert to base64 and add the data URI prefix
const base64 = imageBuffer.toString('base64');
return `data:${mimeType};base64,${base64}`;
} catch (error) {
console.error('Error converting image to base64:', error);
throw new Error(`Failed to convert image to base64: ${error.message}`);
}
}
/**
* Sleep for a specified amount of time
*
* @param {number} ms - Milliseconds to sleep
* @returns {Promise<void>}
*/
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
/**
* Analyze an image using OpenRouter's API via OpenAI SDK
*
* @param {Object} options - Options for image analysis
* @param {string} options.imagePath - Path to the local image file
* @param {string} [options.imageBase64] - Base64 encoded image (alternative to imagePath)
* @param {string} [options.prompt="Please describe this image in detail."] - The prompt to send with the image
* @param {string} [options.model=DEFAULT_MODEL] - The model to use for analysis
* @param {string} [options.apiKey] - OpenRouter API key (defaults to OPENROUTER_API_KEY env var)
* @returns {Promise<Object>} - The analysis results
*/
export async function analyzeImage({
imagePath,
imageBase64,
prompt = "Please describe this image in detail.",
model = DEFAULT_MODEL,
apiKey
}) {
// Check for API key
const openrouterApiKey = apiKey || process.env.OPENROUTER_API_KEY;
if (!openrouterApiKey) {
throw new Error('OpenRouter API key is required. Set OPENROUTER_API_KEY in your environment or pass it as an option.');
}
// Check that we have either imagePath or imageBase64
if (!imagePath && !imageBase64) {
throw new Error('Either imagePath or imageBase64 must be provided.');
}
// Get base64 data if not provided
let base64Data = imageBase64;
if (!base64Data && imagePath) {
console.log(`Converting image at ${imagePath} to base64...`);
base64Data = await imageToBase64(imagePath);
console.log('Image converted successfully!');
}
// Initialize the OpenAI client with OpenRouter base URL
const openai = new OpenAI({
apiKey: openrouterApiKey,
baseURL: 'https://openrouter.ai/api/v1',
defaultHeaders: {
'HTTP-Referer': 'https://github.com/stabgan/openrouter-mcp-multimodal',
'X-Title': 'OpenRouter Local Image Analysis'
}
});
// Implement retry logic
let lastError = null;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
if (attempt > 0) {
console.log(`Retry attempt ${attempt}/${MAX_RETRIES}...`);
await sleep(RETRY_DELAY * attempt); // Exponential backoff
}
console.log(`Sending image analysis request to model: ${model}`);
// Create the message with text and image
const completion = await openai.chat.completions.create({
model,
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: prompt
},
{
type: 'image_url',
image_url: {
url: base64Data
}
}
]
}
]
});
// Extract the relevant information from the response
if (completion && completion.choices && completion.choices.length > 0) {
const result = {
analysis: completion.choices[0].message.content,
model: completion.model,
usage: completion.usage,
requestId: completion.id,
finishReason: completion.choices[0].finish_reason
};
return result;
} else {
throw new Error('Unexpected response structure from OpenRouter API.');
}
} catch (error) {
lastError = error;
// If this is a 402 Payment Required error, we won't retry
if (error.status === 402 || (error.response && error.response.status === 402)) {
console.error('Payment required error. Not retrying.');
break;
}
if (attempt === MAX_RETRIES) {
console.error('Maximum retry attempts reached.');
}
}
}
// If we've exhausted all retries, throw the last error
throw lastError || new Error('Failed to analyze image after multiple attempts.');
}
/**
* Command line interface for image analysis
*/
async function main() {
try {
const args = process.argv.slice(2);
if (args.length === 0) {
console.log('Usage: node openrouter-image-sdk.js <image_path> [prompt]');
console.log('Example: node openrouter-image-sdk.js test.png "What objects do you see in this image?"');
process.exit(0);
}
const imagePath = args[0];
const prompt = args[1] || "Please describe this image in detail. What do you see?";
console.log(`Analyzing image: ${imagePath}`);
console.log(`Prompt: ${prompt}`);
const result = await analyzeImage({ imagePath, prompt });
console.log('\n----- Analysis Results -----\n');
console.log(result.analysis);
console.log('\n----------------------------\n');
console.log('Model used:', result.model);
if (result.usage) {
console.log('Token usage:');
console.log('- Prompt tokens:', result.usage.prompt_tokens);
console.log('- Completion tokens:', result.usage.completion_tokens);
console.log('- Total tokens:', result.usage.total_tokens);
}
} catch (error) {
console.error('Error:', error.message);
if (error.response) {
console.error('API error details:', JSON.stringify(error.response, null, 2));
}
process.exit(1);
}
}
// Run the main function directly
main().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "@stabgan/openrouter-mcp-multimodal",
"version": "1.2.0",
"version": "1.3.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@stabgan/openrouter-mcp-multimodal",
"version": "1.2.0",
"version": "1.3.0",
"license": "MIT",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.8.0",

View File

@@ -1,6 +1,6 @@
{
"name": "@stabgan/openrouter-mcp-multimodal",
"version": "1.3.0",
"version": "1.4.0",
"description": "MCP server for OpenRouter providing text chat and image analysis tools",
"type": "module",
"main": "dist/index.js",

259
send_image_to_openrouter.js Normal file
View File

@@ -0,0 +1,259 @@
// Send an image to OpenRouter using JavaScript
import { promises as fs } from 'fs';
import path from 'path';
import axios from 'axios';
import { OpenAI } from 'openai';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
console.log("Starting script...");
// Constants
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY || 'your_openrouter_api_key'; // Get from env or replace
const IMAGE_PATH = process.argv[2] || 'test.png'; // Get from command line or use default
const DEFAULT_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
console.log(`Arguments: ${process.argv.join(', ')}`);
console.log(`Using image path: ${IMAGE_PATH}`);
// Load environment variables from .env file
async function loadEnv() {
try {
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = path.join(__dirname, '.env');
const envFile = await fs.readFile(envPath, 'utf-8');
envFile.split('\n').forEach(line => {
const match = line.match(/^\s*([\w.-]+)\s*=\s*(.*)?\s*$/);
if (match) {
const key = match[1];
let value = match[2] || '';
// Remove quotes if they exist
if (value.length > 0 && value.charAt(0) === '"' && value.charAt(value.length - 1) === '"') {
value = value.replace(/^"|"$/g, '');
}
process.env[key] = value;
}
});
console.log('Environment variables loaded from .env file');
} catch (error) {
console.error('Error loading .env file:', error.message);
}
}
/**
* Convert an image file to base64
*/
async function imageToBase64(filePath) {
try {
// Read the file
const imageBuffer = await fs.readFile(filePath);
// Determine MIME type based on file extension
const fileExt = path.extname(filePath).toLowerCase();
let mimeType = 'application/octet-stream';
switch (fileExt) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
// Add other supported types as needed
}
// Convert to base64 and add the data URI prefix
const base64 = imageBuffer.toString('base64');
return `data:${mimeType};base64,${base64}`;
} catch (error) {
console.error('Error converting image to base64:', error);
throw error;
}
}
/**
* Example 1: Send a base64 image using the MCP server analyze_image tool
*/
async function testMcpAnalyzeImage(base64Image, question = "What's in this image?") {
try {
console.log('Testing MCP analyze_image tool with base64 image...');
// This would normally be handled by the MCP server client
// This is a simulation of how to structure the data for the MCP server
console.log(`
To analyze the image using MCP, send this request to the MCP server:
{
"tool": "mcp_openrouter_analyze_image",
"arguments": {
"image_path": "${base64Image.substring(0, 50)}...", // Truncated for display
"question": "${question}",
"model": "${DEFAULT_MODEL}"
}
}
The MCP server will convert the image path (which is already a base64 data URL)
and send it to OpenRouter in the correct format.
`);
} catch (error) {
console.error('Error testing MCP analyze_image:', error);
}
}
/**
* Example 2: Send multiple base64 images using the MCP server multi_image_analysis tool
*/
async function testMcpMultiImageAnalysis(base64Images, prompt = "Describe these images in detail.") {
try {
console.log('Testing MCP multi_image_analysis tool with base64 images...');
// Create the images array for the MCP request
const images = base64Images.map(base64 => ({ url: base64 }));
// This would normally be handled by the MCP server client
// This is a simulation of how to structure the data for the MCP server
console.log(`
To analyze multiple images using MCP, send this request to the MCP server:
{
"tool": "mcp_openrouter_multi_image_analysis",
"arguments": {
"images": [
{ "url": "${base64Images[0].substring(0, 50)}..." } // Truncated for display
${base64Images.length > 1 ? `, { "url": "${base64Images[1].substring(0, 50)}..." }` : ''}
${base64Images.length > 2 ? ', ...' : ''}
],
"prompt": "${prompt}",
"model": "${DEFAULT_MODEL}"
}
}
The MCP server will process these base64 images and send them to OpenRouter
in the correct format.
`);
} catch (error) {
console.error('Error testing MCP multi_image_analysis:', error);
}
}
/**
* Example 3: Direct OpenRouter API call with base64 image (for comparison)
*/
async function sendImageDirectAPI(base64Image, question = "What's in this image?", apiKey) {
try {
console.log('Sending image directly to OpenRouter API (for comparison)...');
const response = await axios.post(
'https://openrouter.ai/api/v1/chat/completions',
{
model: DEFAULT_MODEL,
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: question
},
{
type: 'image_url',
image_url: {
url: base64Image
}
}
]
}
]
},
{
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'HTTP-Referer': 'https://github.com/yourusername/your-repo',
'X-Title': 'MCP Server Demo'
}
}
);
console.log('\nDirect API response:');
console.log(response.data.choices[0].message.content);
} catch (error) {
console.error('Error sending image via direct API:', error);
if (error.response) {
console.error('API error details:', error.response.data);
}
}
}
/**
* Main function to run the examples
*/
async function main() {
try {
// Load environment variables from .env file
await loadEnv();
// Get API key from environment after loading
const apiKey = process.env.OPENROUTER_API_KEY || OPENROUTER_API_KEY;
// Debug: Show if API key is set in environment
console.log(`API key from environment: ${process.env.OPENROUTER_API_KEY ? 'Yes (set)' : 'No (not set)'}`);
console.log(`Using API key: ${apiKey === 'your_openrouter_api_key' ? 'Default placeholder (update needed)' : 'From environment'}`);
// Check if API key is provided
if (apiKey === 'your_openrouter_api_key') {
console.error('Please set the OPENROUTER_API_KEY environment variable or update the script.');
return;
}
console.log(`Converting image: ${IMAGE_PATH}`);
// Check if the image file exists
try {
await fs.access(IMAGE_PATH);
console.log(`Image file exists: ${IMAGE_PATH}`);
} catch (err) {
console.error(`Error: Image file does not exist: ${IMAGE_PATH}`);
return;
}
// Convert the image to base64
const base64Image = await imageToBase64(IMAGE_PATH);
console.log('Image converted to base64 successfully.');
console.log(`Base64 length: ${base64Image.length} characters`);
console.log(`Base64 starts with: ${base64Image.substring(0, 50)}...`);
// For multiple images demo, we'll use the same image twice
const base64Images = [base64Image, base64Image];
// Example 1: MCP server with analyze_image
await testMcpAnalyzeImage(base64Image);
// Example 2: MCP server with multi_image_analysis
await testMcpMultiImageAnalysis(base64Images);
// Example 3: Direct API call (if API key is available)
if (apiKey !== 'your_openrouter_api_key') {
await sendImageDirectAPI(base64Image, "What's in this image?", apiKey);
}
console.log('\nDone! You can now use the MCP server with base64 encoded images.');
} catch (error) {
console.error('Error in main function:', error);
}
}
// Run the main function directly
console.log("Running main function...");
main().catch(error => {
console.error("Unhandled error in main:", error);
});

160
send_image_to_openrouter.ts Normal file
View File

@@ -0,0 +1,160 @@
import fs from 'fs/promises';
import path from 'path';
import OpenAI from 'openai';
import axios from 'axios';
// Constants
const OPENROUTER_API_KEY = 'your_openrouter_api_key'; // Replace with your actual key
const IMAGE_PATH = 'path/to/your/image.jpg'; // Replace with your image path
/**
* Convert an image file to base64
*/
async function imageToBase64(filePath: string): Promise<string> {
try {
// Read the file
const imageBuffer = await fs.readFile(filePath);
// Determine MIME type based on file extension
const fileExt = path.extname(filePath).toLowerCase();
let mimeType = 'application/octet-stream';
switch (fileExt) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
// Add other supported types as needed
}
// Convert to base64 and add the data URI prefix
const base64 = imageBuffer.toString('base64');
return `data:${mimeType};base64,${base64}`;
} catch (error) {
console.error('Error converting image to base64:', error);
throw error;
}
}
/**
* Method 1: Send an image to OpenRouter using direct API call
*/
async function sendImageDirectAPI(base64Image: string, question: string = "What's in this image?"): Promise<void> {
try {
console.log('Sending image via direct API call...');
const response = await axios.post(
'https://openrouter.ai/api/v1/chat/completions',
{
model: 'anthropic/claude-3-opus', // Choose an appropriate model with vision capabilities
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: question
},
{
type: 'image_url',
image_url: {
url: base64Image
}
}
]
}
]
},
{
headers: {
'Authorization': `Bearer ${OPENROUTER_API_KEY}`,
'Content-Type': 'application/json',
'HTTP-Referer': 'https://your-site-url.com', // Optional
'X-Title': 'Your Site Name' // Optional
}
}
);
console.log('Response from direct API:');
console.log(response.data.choices[0].message.content);
} catch (error) {
console.error('Error sending image via direct API:', error);
if (axios.isAxiosError(error) && error.response) {
console.error('API error details:', error.response.data);
}
}
}
/**
* Method 2: Send an image to OpenRouter using OpenAI SDK
*/
async function sendImageOpenAISDK(base64Image: string, question: string = "What's in this image?"): Promise<void> {
try {
console.log('Sending image via OpenAI SDK...');
// Initialize the OpenAI client with OpenRouter base URL
const openai = new OpenAI({
apiKey: OPENROUTER_API_KEY,
baseURL: 'https://openrouter.ai/api/v1',
defaultHeaders: {
'HTTP-Referer': 'https://your-site-url.com', // Optional
'X-Title': 'Your Site Name' // Optional
}
});
// Create the message with text and image
const completion = await openai.chat.completions.create({
model: 'anthropic/claude-3-opus', // Choose an appropriate model with vision capabilities
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: question
},
{
type: 'image_url',
image_url: {
url: base64Image
}
}
]
}
]
});
console.log('Response from OpenAI SDK:');
console.log(completion.choices[0].message.content);
} catch (error) {
console.error('Error sending image via OpenAI SDK:', error);
}
}
/**
* Main function to run the examples
*/
async function main() {
try {
// Convert the image to base64
const base64Image = await imageToBase64(IMAGE_PATH);
console.log('Image converted to base64 successfully');
// Example 1: Using direct API call
await sendImageDirectAPI(base64Image);
// Example 2: Using OpenAI SDK
await sendImageOpenAISDK(base64Image);
} catch (error) {
console.error('Error in main function:', error);
}
}
// Run the examples
main();

View File

@@ -15,7 +15,7 @@ class OpenRouterMultimodalServer {
constructor() {
// Retrieve API key and default model from environment variables
const apiKey = process.env.OPENROUTER_API_KEY;
const defaultModel = process.env.DEFAULT_MODEL || DEFAULT_MODEL;
const defaultModel = process.env.OPENROUTER_DEFAULT_MODEL || DEFAULT_MODEL;
// Check if API key is provided
if (!apiKey) {

View File

@@ -137,7 +137,7 @@ export class ToolHandlers {
properties: {
image_path: {
type: 'string',
description: 'Path to the image file to analyze (must be an absolute path)',
description: 'Path to the image file to analyze (can be an absolute file path, URL, or base64 data URL starting with "data:")',
},
question: {
type: 'string',
@@ -167,7 +167,7 @@ export class ToolHandlers {
properties: {
url: {
type: 'string',
description: 'URL or data URL of the image (use file:// URL prefix for local files, http(s):// for web images, or data: for base64 encoded images)',
description: 'URL or data URL of the image (use http(s):// for web images, absolute file paths for local files, or data:image/xxx;base64,... for base64 encoded images)',
},
alt: {
type: 'string',

View File

@@ -1,14 +1,32 @@
import path from 'path';
import { promises as fs } from 'fs';
import sharp from 'sharp';
import fetch from 'node-fetch';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai';
import fetch from 'node-fetch';
import { findSuitableFreeModel } from './multi-image-analysis.js';
// Default model for image analysis
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
let sharp: any;
try {
sharp = require('sharp');
} catch (e) {
console.error('Warning: sharp module not available, using fallback image processing');
// Mock implementation that just passes through the base64 data
sharp = (buffer: Buffer) => ({
metadata: async () => ({ width: 800, height: 600 }),
resize: () => ({
jpeg: () => ({
toBuffer: async () => buffer
})
}),
jpeg: () => ({
toBuffer: async () => buffer
})
});
}
export interface AnalyzeImageToolRequest {
image_path: string;
question?: string;
@@ -49,10 +67,34 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
}
}
/**
* Processes an image with minimal processing when sharp isn't available
*/
async function processImageFallback(buffer: Buffer): Promise<string> {
try {
// Just return the buffer as base64 without processing
return buffer.toString('base64');
} catch (error) {
console.error('Error in fallback image processing:', error);
throw error;
}
}
async function processImage(buffer: Buffer): Promise<string> {
try {
if (typeof sharp !== 'function') {
console.warn('Using fallback image processing (sharp not available)');
return processImageFallback(buffer);
}
// Get image metadata
const metadata = await sharp(buffer).metadata();
let metadata;
try {
metadata = await sharp(buffer).metadata();
} catch (error) {
console.warn('Error getting image metadata, using fallback:', error);
return processImageFallback(buffer);
}
// Calculate dimensions to keep base64 size reasonable
const MAX_DIMENSION = 800;
@@ -81,39 +123,56 @@ async function processImage(buffer: Buffer): Promise<string> {
return jpegBuffer.toString('base64');
} catch (error) {
console.error('Error processing image:', error);
throw error;
console.error('Error processing image, using fallback:', error);
return processImageFallback(buffer);
}
}
/**
* Converts the image at the given path to a base64 string
* Processes an image from a path or base64 string to a proper base64 format for APIs
*/
async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeType: string }> {
async function prepareImage(imagePath: string): Promise<{ base64: string; mimeType: string }> {
try {
// Ensure the image path is absolute
if (!path.isAbsolute(imagePath)) {
throw new McpError(
ErrorCode.InvalidParams,
'Image path must be absolute'
);
// Check if already a base64 data URL
if (imagePath.startsWith('data:')) {
const matches = imagePath.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
if (!matches || matches.length !== 3) {
throw new McpError(ErrorCode.InvalidParams, 'Invalid base64 data URL format');
}
return { base64: matches[2], mimeType: matches[1] };
}
// Check if image is a URL
if (imagePath.startsWith('http://') || imagePath.startsWith('https://')) {
try {
const buffer = await fetchImageAsBuffer(imagePath);
const processed = await processImage(buffer);
return { base64: processed, mimeType: 'image/jpeg' }; // We convert everything to JPEG
} catch (error: any) {
throw new McpError(ErrorCode.InvalidParams, `Failed to fetch image from URL: ${error.message}`);
}
}
// Handle file paths
let absolutePath = imagePath;
// Ensure the image path is absolute if it's a file path
if (!imagePath.startsWith('data:') && !path.isAbsolute(imagePath)) {
throw new McpError(ErrorCode.InvalidParams, 'Image path must be absolute');
}
// Check if the file exists
try {
await fs.access(imagePath);
// Check if the file exists
await fs.access(absolutePath);
} catch (error) {
throw new McpError(
ErrorCode.InvalidParams,
`File not found: ${imagePath}`
);
throw new McpError(ErrorCode.InvalidParams, `File not found: ${absolutePath}`);
}
// Read the file as a buffer
const buffer = await fs.readFile(imagePath);
const buffer = await fs.readFile(absolutePath);
// Determine MIME type from file extension
const extension = path.extname(imagePath).toLowerCase();
const extension = path.extname(absolutePath).toLowerCase();
let mimeType: string;
switch (extension) {
@@ -137,12 +196,11 @@ async function imageToBase64(imagePath: string): Promise<{ base64: string; mimeT
mimeType = 'application/octet-stream';
}
// Convert buffer to base64
const base64 = buffer.toString('base64');
return { base64, mimeType };
// Process and optimize the image
const processed = await processImage(buffer);
return { base64: processed, mimeType };
} catch (error) {
console.error('Error converting image to base64:', error);
console.error('Error preparing image:', error);
throw error;
}
}
@@ -160,23 +218,21 @@ export async function handleAnalyzeImage(
try {
// Validate inputs
if (!args.image_path) {
throw new McpError(ErrorCode.InvalidParams, 'An image path is required');
throw new McpError(ErrorCode.InvalidParams, 'An image path, URL, or base64 data is required');
}
if (!args.question) {
throw new McpError(ErrorCode.InvalidParams, 'A question about the image is required');
}
const question = args.question || "What's in this image?";
console.error(`Processing image: ${args.image_path}`);
console.error(`Processing image: ${args.image_path.substring(0, 100)}${args.image_path.length > 100 ? '...' : ''}`);
// Convert the image to base64
const { base64, mimeType } = await imageToBase64(args.image_path);
const { base64, mimeType } = await prepareImage(args.image_path);
// Create the content array for the OpenAI API
const content = [
{
type: 'text',
text: args.question
text: question
},
{
type: 'image_url',

View File

@@ -1,5 +1,6 @@
import fetch from 'node-fetch';
import sharp from 'sharp';
// Remove the sharp import to avoid conflicts with our dynamic import
// import sharp from 'sharp';
import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai';
import path from 'path';
@@ -8,6 +9,26 @@ import { tmpdir } from 'os';
// Remove uuid import as we'll use a simple random string generator instead
// import { v4 as uuidv4 } from 'uuid';
// Setup sharp with fallback
let sharp: any;
try {
sharp = require('sharp');
} catch (e) {
console.error('Warning: sharp module not available, using fallback image processing');
// Mock implementation that just passes through the base64 data
sharp = (buffer: Buffer) => ({
metadata: async () => ({ width: 800, height: 600 }),
resize: () => ({
jpeg: () => ({
toBuffer: async () => buffer
})
}),
jpeg: () => ({
toBuffer: async () => buffer
})
});
}
// Default model for image analysis
const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free';
@@ -149,13 +170,25 @@ async function fetchImageAsBuffer(url: string): Promise<Buffer> {
*/
async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
try {
if (typeof sharp !== 'function') {
console.warn('Using fallback image processing (sharp not available)');
return processImageFallback(buffer, mimeType);
}
// Create a temporary directory for processing if needed
const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`);
await fs.mkdir(tempDir, { recursive: true });
// Get image info
let sharpInstance = sharp(buffer);
const metadata = await sharpInstance.metadata();
let metadata;
try {
metadata = await sharpInstance.metadata();
} catch (error) {
console.warn('Error getting image metadata, using fallback:', error);
return processImageFallback(buffer, mimeType);
}
// Skip processing for small images
if (metadata.width && metadata.height &&
@@ -177,19 +210,20 @@ async function processImage(buffer: Buffer, mimeType: string): Promise<string> {
}
}
// Convert to JPEG for consistency and small size
const processedBuffer = await sharpInstance
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return processedBuffer.toString('base64');
try {
// Convert to JPEG for consistency and small size
const processedBuffer = await sharpInstance
.jpeg({ quality: JPEG_QUALITY })
.toBuffer();
return processedBuffer.toString('base64');
} catch (error) {
console.warn('Error in final image processing, using fallback:', error);
return processImageFallback(buffer, mimeType);
}
} catch (error) {
console.error('Error processing image:', error);
// If sharp processing fails, return the original buffer
// This is a fallback to ensure we don't completely fail on processing errors
console.error('Returning original image without processing');
return buffer.toString('base64');
console.error('Error processing image, using fallback:', error);
return processImageFallback(buffer, mimeType);
}
}
@@ -265,7 +299,7 @@ export async function findSuitableFreeModel(openai: OpenAI): Promise<string> {
}
/**
* Process and analyze multiple images using OpenRouter
* Main handler for multi-image analysis
*/
export async function handleMultiImageAnalysis(
request: { params: { arguments: MultiImageAnalysisToolRequest } },
@@ -276,65 +310,50 @@ export async function handleMultiImageAnalysis(
try {
// Validate inputs
if (!args.images || args.images.length === 0) {
if (!args.images || !Array.isArray(args.images) || args.images.length === 0) {
throw new McpError(ErrorCode.InvalidParams, 'At least one image is required');
}
if (!args.prompt) {
throw new McpError(ErrorCode.InvalidParams, 'A prompt is required');
throw new McpError(ErrorCode.InvalidParams, 'A prompt for analyzing the images is required');
}
// Prepare content array for the message
const content: Array<any> = [{
type: 'text',
text: args.prompt
}];
console.error(`Processing ${args.images.length} images`);
// Track successful and failed images for reporting
const successfulImages = [];
const failedImages = [];
// Process each image
for (const [index, image] of args.images.entries()) {
try {
console.error(`Processing image ${index + 1}/${args.images.length}: ${image.url.substring(0, 50)}...`);
// Get MIME type
const mimeType = getMimeType(image.url);
// Fetch and process the image
const imageBuffer = await fetchImageAsBuffer(image.url);
const base64Image = await processImage(imageBuffer, mimeType);
// Use JPEG as the output format for consistency
const outputMimeType = 'image/jpeg';
// Add to content
content.push({
type: 'image_url',
image_url: {
url: `data:${outputMimeType};base64,${base64Image}`
// Process each image and convert to base64 if needed
const processedImages = await Promise.all(
args.images.map(async (image, index) => {
try {
// Skip processing if already a data URL
if (image.url.startsWith('data:')) {
console.error(`Image ${index + 1} is already in base64 format`);
return image;
}
});
successfulImages.push(image.url);
} catch (error) {
console.error(`Error processing image ${index + 1} (${image.url.substring(0, 30)}...):`, error);
failedImages.push({url: image.url, error: error instanceof Error ? error.message : String(error)});
// Continue with other images if one fails
}
}
// If no images were successfully processed
if (content.length === 1) {
const errorDetails = failedImages.map(img => `${img.url.substring(0, 30)}...: ${img.error}`).join('; ');
throw new Error(`Failed to process any of the provided images. Errors: ${errorDetails}`);
}
console.error(`Processing image ${index + 1}: ${image.url.substring(0, 100)}${image.url.length > 100 ? '...' : ''}`);
// Get MIME type
const mimeType = getMimeType(image.url);
// Fetch and process the image
const buffer = await fetchImageAsBuffer(image.url);
const base64 = await processImage(buffer, mimeType);
return {
url: `data:${mimeType === 'application/octet-stream' ? 'image/jpeg' : mimeType};base64,${base64}`,
alt: image.alt
};
} catch (error: any) {
console.error(`Error processing image ${index + 1}:`, error);
throw new Error(`Failed to process image ${index + 1}: ${image.url}. Error: ${error.message}`);
}
})
);
// Select model with priority:
// 1. User-specified model
// 2. Default model from environment
// 3. Default free vision model (qwen/qwen2.5-vl-32b-instruct:free)
// 3. Default free vision model
let model = args.model || defaultModel || DEFAULT_FREE_MODEL;
// If a model is specified but not our default free model, verify it exists
@@ -348,7 +367,30 @@ export async function handleMultiImageAnalysis(
}
console.error(`Making API call with model: ${model}`);
console.error(`Successfully processed ${successfulImages.length} images, ${failedImages.length} failed`);
// Build content array for the API call
const content: Array<{
type: string;
text?: string;
image_url?: {
url: string
}
}> = [
{
type: 'text',
text: args.prompt
}
];
// Add each processed image to the content array
processedImages.forEach(image => {
content.push({
type: 'image_url',
image_url: {
url: image.url
}
});
});
// Make the API call
const completion = await openai.chat.completions.create({
@@ -359,16 +401,19 @@ export async function handleMultiImageAnalysis(
}] as any
});
// Format the response
// Get response text and format if requested
let responseText = completion.choices[0].message.content || '';
// Add information about failed images if any
if (failedImages.length > 0) {
const formattedErrors = args.markdown_response !== false
? `\n\n---\n\n**Note:** ${failedImages.length} image(s) could not be processed:\n${failedImages.map((img, i) => `- Image ${i+1}: ${img.error}`).join('\n')}`
: `\n\nNote: ${failedImages.length} image(s) could not be processed: ${failedImages.map((img, i) => `Image ${i+1}: ${img.error}`).join('; ')}`;
responseText += formattedErrors;
// Format as markdown if requested
if (args.markdown_response) {
// Simple formatting enhancements
responseText = responseText
// Add horizontal rule after sections
.replace(/^(#{1,3}.*)/gm, '$1\n\n---')
// Ensure proper spacing for lists
.replace(/^(\s*[-*•]\s.+)$/gm, '\n$1')
// Convert plain URLs to markdown links
.replace(/(https?:\/\/[^\s]+)/g, '[$1]($1)');
}
// Return the analysis result
@@ -381,12 +426,10 @@ export async function handleMultiImageAnalysis(
],
metadata: {
model: completion.model,
usage: completion.usage,
successful_images: successfulImages.length,
failed_images: failedImages.length
usage: completion.usage
}
};
} catch (error) {
} catch (error: any) {
console.error('Error in multi-image analysis:', error);
if (error instanceof McpError) {
@@ -397,14 +440,27 @@ export async function handleMultiImageAnalysis(
content: [
{
type: 'text',
text: `Error analyzing images: ${error instanceof Error ? error.message : String(error)}`,
text: `Error analyzing images: ${error.message}`,
},
],
isError: true,
metadata: {
error_type: error instanceof Error ? error.constructor.name : 'Unknown',
error_message: error instanceof Error ? error.message : String(error)
error_type: error.constructor.name,
error_message: error.message
}
};
}
}
/**
* Processes an image with minimal processing when sharp isn't available
*/
async function processImageFallback(buffer: Buffer, mimeType: string): Promise<string> {
try {
// Just return the buffer as base64 without processing
return buffer.toString('base64');
} catch (error) {
console.error('Error in fallback image processing:', error);
throw error;
}
}

65
start-server.js Normal file
View File

@@ -0,0 +1,65 @@
// Load environment variables and start the MCP server
import { promises as fs } from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import { spawn } from 'child_process';
// Get current directory
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// Path to .env file
const envPath = path.join(__dirname, '.env');
async function loadEnvAndStartServer() {
try {
console.log('Loading environment variables from .env file...');
// Read .env file
const envContent = await fs.readFile(envPath, 'utf8');
// Parse .env file and set environment variables
const envVars = {};
envContent.split('\n').forEach(line => {
const match = line.match(/^\s*([\w.-]+)\s*=\s*(.*)?\s*$/);
if (match) {
const key = match[1];
let value = match[2] || '';
// Remove quotes if they exist
if (value.length > 0 && value.charAt(0) === '"' && value.charAt(value.length - 1) === '"') {
value = value.replace(/^"|"$/g, '');
}
envVars[key] = value;
process.env[key] = value;
}
});
console.log('Environment variables loaded successfully');
console.log(`API Key found: ${process.env.OPENROUTER_API_KEY ? 'Yes' : 'No'}`);
// Start the server process with environment variables
console.log('Starting MCP server...');
const serverProcess = spawn('node', ['dist/index.js'], {
env: { ...process.env, ...envVars },
stdio: 'inherit'
});
// Handle server process events
serverProcess.on('close', (code) => {
console.log(`MCP server exited with code ${code}`);
});
serverProcess.on('error', (err) => {
console.error('Failed to start MCP server:', err);
});
} catch (error) {
console.error('Error:', error);
}
}
// Run the function
loadEnvAndStartServer();

155
test-openai-sdk.js Normal file
View File

@@ -0,0 +1,155 @@
import 'dotenv/config';
import { promises as fs } from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import { OpenAI } from 'openai';
// Get the directory name for ES modules
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// Constants
const TEST_IMAGE_PATH = 'test.png'; // Adjust to your image path
/**
* Convert an image file to base64
*/
async function imageToBase64(filePath) {
try {
// Read the file
const imageBuffer = await fs.readFile(filePath);
// Determine MIME type based on file extension
const fileExt = path.extname(filePath).toLowerCase();
let mimeType = 'application/octet-stream';
switch (fileExt) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
default:
console.log(`Using default MIME type for extension: ${fileExt}`);
}
// Convert to base64 and add the data URI prefix
const base64 = imageBuffer.toString('base64');
return `data:${mimeType};base64,${base64}`;
} catch (error) {
console.error('Error converting image to base64:', error);
throw error;
}
}
/**
* Send an image to OpenRouter using OpenAI SDK
*/
async function analyzeImageWithOpenRouter(base64Image, question = "What's in this image?") {
try {
console.log('Initializing OpenAI client with OpenRouter...');
// Initialize the OpenAI client with OpenRouter base URL
const openai = new OpenAI({
apiKey: process.env.OPENROUTER_API_KEY,
baseURL: 'https://openrouter.ai/api/v1',
defaultHeaders: {
'HTTP-Referer': 'https://github.com/stabgan/openrouter-mcp-multimodal',
'X-Title': 'OpenRouter MCP Test'
}
});
console.log('Sending image for analysis to Qwen free model...');
// Create the message with text and image
const completion = await openai.chat.completions.create({
model: 'qwen/qwen2.5-vl-32b-instruct:free', // Using Qwen free model with vision capabilities
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: question
},
{
type: 'image_url',
image_url: {
url: base64Image
}
}
]
}
]
});
// Debug the completion response structure
console.log('\n----- Debug: API Response -----');
console.log(JSON.stringify(completion, null, 2));
console.log('----- End Debug -----\n');
// Check if completion has expected structure before accessing properties
if (completion && completion.choices && completion.choices.length > 0 && completion.choices[0].message) {
console.log('\n----- Analysis Results -----\n');
console.log(completion.choices[0].message.content);
console.log('\n----------------------------\n');
// Print additional information about the model used and token usage
console.log('Model used:', completion.model);
if (completion.usage) {
console.log('Token usage:');
console.log('- Prompt tokens:', completion.usage.prompt_tokens);
console.log('- Completion tokens:', completion.usage.completion_tokens);
console.log('- Total tokens:', completion.usage.total_tokens);
}
} else {
console.log('Unexpected response structure from OpenRouter API.');
}
return completion;
} catch (error) {
console.error('Error analyzing image with OpenRouter:');
if (error.response) {
console.error('API error status:', error.status);
console.error('API error details:', JSON.stringify(error.response, null, 2));
} else if (error.cause) {
console.error('Error cause:', error.cause);
} else {
console.error(error);
}
throw error;
}
}
/**
* Main function
*/
async function main() {
try {
if (!process.env.OPENROUTER_API_KEY) {
throw new Error('OPENROUTER_API_KEY not found in environment variables. Create a .env file with your API key.');
}
console.log(`Converting image at ${TEST_IMAGE_PATH} to base64...`);
const base64Image = await imageToBase64(TEST_IMAGE_PATH);
console.log('Image converted successfully!');
// Log the first 100 chars of the base64 string to verify format
console.log('Base64 string preview:', base64Image.substring(0, 100) + '...');
// Analyze the image
await analyzeImageWithOpenRouter(base64Image, "Please describe this image in detail. What do you see?");
} catch (error) {
console.error('Error in main function:', error);
process.exit(1);
}
}
// Run the script
main();

1
test.html Normal file
View File

@@ -0,0 +1 @@
<html><body><img src="test.png"></body></html>

BIN
test.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

1
test_base64.txt Normal file

File diff suppressed because one or more lines are too long

94
test_mcp_server.js Normal file
View File

@@ -0,0 +1,94 @@
// Test MCP server with image analysis
import { promises as fs } from 'fs';
import path from 'path';
// Path to test image
const IMAGE_PATH = process.argv[2] || 'test.png';
// Function to convert image to base64
async function imageToBase64(imagePath) {
try {
// Read the file
const imageBuffer = await fs.readFile(imagePath);
// Determine MIME type based on file extension
const fileExt = path.extname(imagePath).toLowerCase();
let mimeType = 'application/octet-stream';
switch (fileExt) {
case '.png':
mimeType = 'image/png';
break;
case '.jpg':
case '.jpeg':
mimeType = 'image/jpeg';
break;
case '.webp':
mimeType = 'image/webp';
break;
default:
mimeType = 'image/png'; // Default to PNG
}
// Convert to base64 and add the data URI prefix
const base64 = imageBuffer.toString('base64');
return `data:${mimeType};base64,${base64}`;
} catch (error) {
console.error('Error converting image to base64:', error);
throw error;
}
}
// Main function to test the MCP server
async function main() {
try {
console.log(`Converting image: ${IMAGE_PATH}`);
// Check if the image file exists
try {
await fs.access(IMAGE_PATH);
console.log(`Image file exists: ${IMAGE_PATH}`);
} catch (err) {
console.error(`Error: Image file does not exist: ${IMAGE_PATH}`);
return;
}
// Convert the image to base64
const base64Image = await imageToBase64(IMAGE_PATH);
console.log('Image converted to base64 successfully.');
console.log(`Base64 length: ${base64Image.length} characters`);
// Create the request for analyze_image
const analyzeImageRequest = {
jsonrpc: '2.0',
id: '1',
method: 'mcp/call_tool',
params: {
tool: 'mcp_openrouter_analyze_image',
arguments: {
image_path: base64Image,
question: "What's in this image?",
model: 'qwen/qwen2.5-vl-32b-instruct:free'
}
}
};
// Send the request to the MCP server's stdin
console.log('Sending request to MCP server...');
process.stdout.write(JSON.stringify(analyzeImageRequest) + '\n');
// The MCP server will write the response to stdout, which we can read
console.log('Waiting for response...');
// In a real application, you would read from the server's stdout stream
// Here we just wait for input to be processed by the MCP server
console.log('Request sent to MCP server. Check the server logs for the response.');
} catch (error) {
console.error('Error in main function:', error);
}
}
// Run the main function
main().catch(error => {
console.error("Unhandled error in main:", error);
});