396 lines
15 KiB
TypeScript
396 lines
15 KiB
TypeScript
|
|
import { GoogleGenAI } from "@google/genai";
|
|
import dotenv from "dotenv";
|
|
|
|
dotenv.config();
|
|
|
|
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY as string });
|
|
|
|
interface StickerPlan {
|
|
prompts: string[];
|
|
characterCore: string;
|
|
}
|
|
|
|
export const geminiService = {
|
|
/**
|
|
* The Brain: Analyzes the user's prompt and generates a list of distinct scenarios.
|
|
* @param originalPrompt The user's raw input
|
|
* @param count Number of variations to generate
|
|
* @returns List of prompts and the core character description
|
|
*/
|
|
async generateStickerSetPlan(originalPrompt: string, count: number): Promise<StickerPlan> {
|
|
console.log(`[GeminiService] Planning ${count} sticker variations...`);
|
|
|
|
const planningPrompt = `
|
|
You are an expert Sticker Set Planner.
|
|
Analyze this sticker prompt: "${originalPrompt}"
|
|
|
|
1. Extract the CORE VISUAL IDENTITY (Character description, style, colors, clothes).
|
|
This must be preserved EXACTLY in every variation.
|
|
|
|
2. Generate ${count} DISTINCT variation prompts.
|
|
Each variation must be a different pose, emotion, or action suitable for a sticker pack.
|
|
(e.g., Happy, Sad, Thinking, Coffee, Running, Sleeping, Winking, etc.)
|
|
|
|
Output JSON ONLY:
|
|
{
|
|
"characterCore": "The extracted core visual description...",
|
|
"variations": [
|
|
"Full prompt for variation 1...",
|
|
"Full prompt for variation 2..."
|
|
]
|
|
}
|
|
|
|
Rules for Variations:
|
|
- Combine the "Character Core" with the new pose/action.
|
|
- Ensure the output is a FULL stable diffusion style prompt (tags).
|
|
- Keep the style tags consistent.
|
|
`;
|
|
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: "gemini-2.0-flash",
|
|
contents: [{ text: planningPrompt }],
|
|
config: {
|
|
responseMimeType: "application/json"
|
|
} as any
|
|
});
|
|
|
|
const text = response.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
if (!text) throw new Error("No response from Gemini");
|
|
|
|
const json = JSON.parse(text);
|
|
|
|
let prompts = json.variations || [];
|
|
|
|
// SECURITY: Force array to be string[]
|
|
if (!Array.isArray(prompts)) {
|
|
prompts = [originalPrompt];
|
|
}
|
|
|
|
// CRITICAL FIX: Ensure we have exactly 'count' items
|
|
// If AI returns fewer, we pad with the existing ones (round-robin)
|
|
if (prompts.length < count) {
|
|
console.warn(`[GeminiService] AI returned only ${prompts.length}/${count} variations. Padding...`);
|
|
while (prompts.length < count) {
|
|
// pushing a random existing prompt to fill the gap
|
|
prompts.push(prompts[prompts.length % prompts.length]);
|
|
}
|
|
}
|
|
|
|
// If AI determines more are needed, trim? No, extra is fine, but let's slice just in case UI expects N
|
|
// Actually, extra is bonus. But user paid for N? We usually don't charge per variant in this model, but batch cost.
|
|
|
|
return {
|
|
prompts: prompts.slice(0, count), // Ensure exact count
|
|
characterCore: json.characterCore || originalPrompt
|
|
};
|
|
|
|
} catch (error: any) {
|
|
console.error("Gemini Planning Error:", error);
|
|
// Fallback: Just repeat the prompt if planning fails? Or error out?
|
|
// For now, return the original prompt repeated to avoid crash, but log error.
|
|
return {
|
|
prompts: Array(count).fill(originalPrompt),
|
|
characterCore: originalPrompt
|
|
};
|
|
}
|
|
},
|
|
|
|
/**
|
|
* X-Ray: Updates the user on the visual DNA, strategic gaps, and superior prompt.
|
|
*/
|
|
async analyzeCompetitorProduct(params: {
|
|
title: string;
|
|
description: string;
|
|
imageBase64: string;
|
|
apiKey?: string;
|
|
}): Promise<any> {
|
|
console.log(`[GeminiService] Running Competitor X-Ray Analysis...`);
|
|
const { title, description, imageBase64, apiKey } = params;
|
|
|
|
// Strip header if present to avoid 500 errors
|
|
const cleanBase64 = imageBase64.replace(/^data:image\/\w+;base64,/, "");
|
|
|
|
// Use the specialized Vision model for analysis
|
|
const MODEL_NAME = "gemini-2.0-flash";
|
|
|
|
const analysisPrompt = `
|
|
You are an elite E-commerce Strategist and Art Director.
|
|
Analyze this competitor product to help us create something SUPERIOR.
|
|
|
|
PRODUCT CONTEXT:
|
|
Title: "${title}"
|
|
Description: "${description.substring(0, 500)}..."
|
|
|
|
TASK:
|
|
1. VISUAL DNA: Deconstruct the aesthetic formula (Color palette info, composition style, emotional triggers).
|
|
2. SENTIMENT GAP: Identify what is missing or could be improved (e.g., "Lighting is too flat", "Composition is cluttered").
|
|
3. SUPERIOR PROMPT: Write a "Nano Banana" style stable diffusion prompt to generate a version of this product that is 10x BETTER.
|
|
- Must use (weighted:1.2) tags.
|
|
- Must include quality boosters.
|
|
- Must solve the identified gaps.
|
|
|
|
OUTPUT JSON ONLY:
|
|
{
|
|
"visualDna": ["Tag 1", "Tag 2", "Hex Colors", "Composition Rule"],
|
|
"sentimentGap": "Brief strategic analysis of weaknesses...",
|
|
"superiorPrompt": "(masterpiece:1.4), ...",
|
|
"gapAnalysis": "Detailed explanation of why the new prompt is better"
|
|
}
|
|
`;
|
|
|
|
try {
|
|
const client = apiKey ? new GoogleGenAI({ apiKey }) : ai;
|
|
|
|
const response = await client.models.generateContent({
|
|
model: MODEL_NAME,
|
|
contents: [
|
|
{
|
|
role: "user",
|
|
parts: [
|
|
{ text: analysisPrompt },
|
|
{
|
|
inlineData: {
|
|
mimeType: "image/jpeg",
|
|
data: cleanBase64
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
config: {
|
|
responseMimeType: "application/json"
|
|
} as any
|
|
});
|
|
|
|
const text = response.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
if (!text) throw new Error("No response from AI");
|
|
|
|
return JSON.parse(text);
|
|
|
|
} catch (error: any) {
|
|
console.error("Gemini X-Ray Error:", error);
|
|
throw new Error("Failed to analyze product: " + error.message);
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Neuro-Scorecard: Analyzes an image for commercial potential using neuro-marketing principles.
|
|
*/
|
|
async analyzeImageNeuroScore(params: {
|
|
imageBase64: string;
|
|
apiKey?: string;
|
|
}): Promise<any> {
|
|
console.log(`[GeminiService] Running Neuro-Scorecard Analysis...`);
|
|
const { imageBase64, apiKey } = params;
|
|
// Strip header if present
|
|
const cleanBase64 = imageBase64.replace(/^data:image\/\w+;base64,/, "");
|
|
|
|
// Use Vision model for image analysis
|
|
const MODEL_NAME = "gemini-2.0-flash";
|
|
|
|
const analysisPrompt = `
|
|
You are a Neuromarketing Expert and Senior Art Director.
|
|
Score this image based on its potential to sell on Etsy.
|
|
|
|
ANALYZE THESE DIMENSIONS (Score 0-10):
|
|
1. **Dopamine Hit**: Does it create immediate excitement/craving?
|
|
2. **Serotonin Flow**: Does it evoke trust, calm, or belonging?
|
|
3. **Cognitive Ease**: Is it easy to process instantly? (High score = distinct subject, clear lighting).
|
|
4. **Commercial Fit**: Does it look like a high-end product vs. an amateur photo?
|
|
|
|
CRITICAL INSTRUCTION FOR "IMPROVEMENTS":
|
|
- YOU MUST PROVIDE EXACTLY 2 SPECIFIC IMPROVEMENTS PER CATEGORY.
|
|
- EVEN IF THE SCORE IS 10/10, suggest experimental tweaks.
|
|
- Do NOT give generic advice like "increase contrast".
|
|
- BE SPECIFIC to the image content. Mention specific objects, colors, or areas.
|
|
|
|
⚠️ FORBIDDEN SUGGESTIONS (NEVER SUGGEST THESE):
|
|
- Mockups (e.g., "show this in a living room", "place on a wall")
|
|
- Context/environment changes (e.g., "add a frame", "show as wall art")
|
|
- Marketing/presentation ideas (e.g., "include in a bundle", "show lifestyle shot")
|
|
|
|
✅ ALLOWED SUGGESTIONS (ONLY THESE TYPES):
|
|
- Color adjustments (saturation, hue, warmth, vibrancy)
|
|
- Lighting changes (exposure, shadows, highlights, contrast)
|
|
- Composition tweaks (crop, reframe, balance, focal point)
|
|
- Detail enhancements (sharpness, texture, remove artifacts)
|
|
- Style refinements (artistic filters, mood adjustments, grain)
|
|
|
|
- Example GOOD: "Increase the saturation of the red vase to make it pop."
|
|
- Example GOOD: "Add subtle vignette to draw eye to center."
|
|
- Example BAD: "Put this in a living room mockup." ❌
|
|
- NEVER RETURN AN EMPTY ARRAY.
|
|
|
|
OUTPUT JSON ONLY:
|
|
{
|
|
"scores": {
|
|
"dopamine": 8.5,
|
|
"serotonin": 7.0,
|
|
"cognitiveEase": 9.0,
|
|
"commercialFit": 6.5
|
|
},
|
|
"feedback": [
|
|
"Positive point 1",
|
|
"Positive point 2"
|
|
],
|
|
"improvements": {
|
|
"dopamine": ["Specific fix 1", "Specific fix 2"],
|
|
"serotonin": ["Specific fix 1", "Specific fix 2"],
|
|
"cognitiveEase": ["Specific fix 1", "Specific fix 2"],
|
|
"commercialFit": ["Specific fix 1", "Specific fix 2"]
|
|
},
|
|
"prediction": "High/Medium/Low Conversion Potential"
|
|
}
|
|
`;
|
|
|
|
try {
|
|
const client = apiKey ? new GoogleGenAI({ apiKey }) : ai;
|
|
|
|
const response = await client.models.generateContent({
|
|
model: MODEL_NAME,
|
|
contents: [
|
|
{
|
|
role: "user",
|
|
parts: [
|
|
{ text: analysisPrompt },
|
|
{
|
|
inlineData: {
|
|
mimeType: "image/jpeg",
|
|
data: cleanBase64
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
config: {
|
|
responseMimeType: "application/json"
|
|
} as any
|
|
});
|
|
|
|
const text = response.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
if (!text) throw new Error("No response from AI");
|
|
|
|
const data = JSON.parse(text);
|
|
|
|
// NORMALIZE DATA: Check if AI returned old format or missing keys
|
|
if (!data.improvements) {
|
|
console.warn("[GeminiService] AI returned old format, normalizing...");
|
|
data.improvements = {
|
|
dopamine: [],
|
|
serotonin: [],
|
|
cognitiveEase: [],
|
|
commercialFit: data.criticalImprovements || [] // Fallback to old field
|
|
};
|
|
}
|
|
|
|
// Ensure all keys exist
|
|
const defaults = { dopamine: [], serotonin: [], cognitiveEase: [], commercialFit: [] };
|
|
data.improvements = { ...defaults, ...data.improvements };
|
|
|
|
return data;
|
|
|
|
} catch (error: any) {
|
|
console.error("Gemini Neuro-Score Error:", error);
|
|
throw new Error("Failed to score image: " + error.message);
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Web Research: Uses Google Search Grounding to extract metadata from a URL.
|
|
* Bypasses local IP blocking by using Google's servers.
|
|
*/
|
|
async performWebResearch(url: string, apiKey?: string): Promise<{ title: string, description: string, image: string }> {
|
|
console.log(`[GeminiService] Performing Google Search Grounding for: ${url}`);
|
|
|
|
const researchPrompt = `
|
|
Analyze this product URL: "${url}"
|
|
|
|
TASK:
|
|
Extract the following metadata from this product page:
|
|
1. Product Title (Exact full title)
|
|
2. Product Description (First 2-3 sentences summary)
|
|
3. Main Product Image URL (Direct link to the highest resolution image, must be a full URL starting with https://)
|
|
|
|
IMPORTANT: You MUST return valid JSON. Do not include any text before or after the JSON.
|
|
|
|
OUTPUT FORMAT (JSON ONLY):
|
|
{
|
|
"title": "Product Title Here",
|
|
"description": "Product Description Here...",
|
|
"image": "https://full-url-to-image.jpg"
|
|
}
|
|
`;
|
|
|
|
try {
|
|
const client = apiKey ? new GoogleGenAI({ apiKey }) : ai;
|
|
|
|
// First try with Google Search grounding
|
|
let response;
|
|
try {
|
|
response = await client.models.generateContent({
|
|
model: "gemini-2.0-flash",
|
|
contents: [{ text: researchPrompt }],
|
|
tools: [{
|
|
googleSearch: {}
|
|
}],
|
|
config: {
|
|
responseMimeType: "application/json"
|
|
}
|
|
} as any);
|
|
} catch (searchError: any) {
|
|
console.warn("[GeminiService] Google Search grounding failed, trying without:", searchError.message);
|
|
// Fallback: Try without search grounding
|
|
response = await client.models.generateContent({
|
|
model: "gemini-2.0-flash",
|
|
contents: [{ text: researchPrompt }],
|
|
config: {
|
|
responseMimeType: "application/json"
|
|
} as any
|
|
});
|
|
}
|
|
|
|
const text = response.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
console.log("[GeminiService] Raw research response:", text?.substring(0, 200));
|
|
|
|
if (!text) throw new Error("No response from AI Research");
|
|
|
|
// Try to parse JSON, handle cases where response might have extra text
|
|
let data;
|
|
try {
|
|
data = JSON.parse(text);
|
|
} catch (parseError) {
|
|
// Try to extract JSON from text
|
|
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
if (jsonMatch) {
|
|
data = JSON.parse(jsonMatch[0]);
|
|
} else {
|
|
throw new Error("Could not parse JSON from response");
|
|
}
|
|
}
|
|
|
|
// Validate required fields
|
|
if (!data.title) {
|
|
console.warn("[GeminiService] Missing title in response, extracting from URL...");
|
|
// Extract title from URL as fallback
|
|
const urlParts = url.split('/');
|
|
const slug = urlParts.find(p => p.length > 10 && !p.includes('.'));
|
|
data.title = slug ? slug.replace(/-/g, ' ') : "Unknown Product";
|
|
}
|
|
|
|
if (!data.image || !data.image.startsWith('http')) {
|
|
console.warn("[GeminiService] Invalid or missing image URL");
|
|
data.image = "";
|
|
}
|
|
|
|
console.log(`[GeminiService] Research Success: ${data.title}`);
|
|
return data;
|
|
|
|
} catch (error: any) {
|
|
console.error("Gemini Research Error:", error);
|
|
throw new Error("Failed to research url: " + error.message);
|
|
}
|
|
}
|
|
};
|