import { GoogleGenAI, GenerateContentResponse, GenerateImagesResponse } from "@google/genai";
import { GEMINI_IMAGE_MODEL, GEMINI_TEXT_MODEL, PROMPT_STYLES } from '../constants';
import type { TextRefinementOutput, InternalRefinementOutput } from "../types";
export enum PatentDrawingStyle {
ITERATIVE_INITIAL,
ITERATIVE_AUTO_REFINE,
}
interface ExtractedFigureLabelInfo {
instructionsText: string;
primaryFigureNum: string | null;
rawExtractedElements: string[];
}
let ai: GoogleGenAI | null = null;
try {
if (!process.env.API_KEY) {
throw new Error("API_KEY environment variable is not set.");
}
ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
} catch (e) {
console.error("Failed to initialize GoogleGenAI:", e);
}
export const extractFigureLabelInstructions = (userInput: string): ExtractedFigureLabelInfo => {
let cleanedInput = userInput;
cleanedInput = cleanedInput.replace(/\[\d{2,}\]/g, '');
const measurementPattern = /\b\d+(\.\d+)?\s*(?:K|kHz|MHz|GHz|μs|ms|ns|microseconds|milliseconds|seconds|min|hr|C|F|Kelvin|Pa|kPa|MPa|bar|psi|N|kN|V|mV|kV|A|mA|μA|W|mW|kW|Hz|Ω|kΩ|MΩ|dB|lm|cd|mol|g|kg|mg|μg|L|mL|mm|cm|m|km)\b/gi;
cleanedInput = cleanedInput.replace(measurementPattern, '');
const patterns = [
/\b(?:figure|fig)\s*\.?\s*([\w\d]+(?:[a-zA-Z](?![a-zA-Z]))?)\b/gi,
/\b(?:label|item|part|element|ref|reference|indicator|designator)\s+([a-zA-Z0-9][\w.-]*[a-zA-Z0-9]?)\b/gi,
/\b[a-zA-Z][\w\s-]*[a-zA-Z0-9]\s*\(([a-zA-Z0-9][\w.-]*[a-zA-Z0-9]?)\)/gi,
/\b([A-Z]+-?\d+[a-zA-Z]?|\d+[a-zA-Z]?-?[A-Z]?)(?![a-zA-Z0-9-])\b/gi,
];
let instructionsSet = new Set<string>();
patterns.forEach(pattern => {
const matches = cleanedInput.matchAll(pattern);
for (const match of matches) {
let anInstruction = (match[1] || match[0]).trim();
if (anInstruction.length < 1) continue;
if (anInstruction.length === 1 && !/^[A-Za-z0-9]$/.test(anInstruction)) continue;
if (anInstruction.length > 30 && !(anInstruction.toLowerCase().startsWith("figure") || anInstruction.toLowerCase().startsWith("fig"))) continue;
if (/^\d+$/.test(anInstruction) && parseInt(anInstruction) > 999 && !(match[0].toLowerCase().startsWith("figure") || match[0].toLowerCase().startsWith("fig"))) continue;
if (/^[A-Z][a-z]+$/.test(anInstruction) && anInstruction.length > 2 && !match[0].toLowerCase().includes("fig") && !match[0].toLowerCase().includes("label")) continue;
instructionsSet.add(anInstruction);
}
});
const uniqueInstructions = Array.from(instructionsSet);
let primaryFigureNum: string | null = null;
if (uniqueInstructions.length > 0) {
const figureNumberCounts: Record<string, number> = {};
uniqueInstructions.forEach(instr => {
const figMatch = instr.match(/\b(?:figure|fig)\s*\.?\s*([\w\d]+(?:[a-zA-Z](?![a-zA-Z]))?)\b/i);
if (figMatch && figMatch[1]) {
const figNum = figMatch[1];
if (instr.toLowerCase().startsWith("figure") || instr.toLowerCase().startsWith("fig")) {
figureNumberCounts[figNum] = (figureNumberCounts[figNum] || 0) + 1;
if (!primaryFigureNum || figureNumberCounts[figNum] > figureNumberCounts[primaryFigureNum!]) {
primaryFigureNum = figNum;
}
}
}
});
}
let baseInstructionText: string;
if (uniqueInstructions.length === 0) {
baseInstructionText = "No specific figure numbers or explicit labels were parsed from the description. AI should ensure no text is added to the drawing unless a figure number is clearly implied by the main description.";
} else {
baseInstructionText = `Potential Figure/Label Information (AI must discern actual labels/figures to render): "${uniqueInstructions.join("; ")}".`;
}
let fullInstructionsText = baseInstructionText;
if (primaryFigureNum) {
fullInstructionsText += ` The primary figure number identified appears to be '${primaryFigureNum}'. AI should focus on rendering this figure number if applicable.`;
}
return {
instructionsText: fullInstructionsText,
primaryFigureNum: primaryFigureNum,
rawExtractedElements: uniqueInstructions
};
};
// This function now primarily serves to construct the INITIAL image prompt.
// Subsequent prompts are refined by refineInternalImageGenInstructions.
export const constructInitialImagePrompt = (
subjectDescriptionForThisIteration: string, // This could be original or co-iterated text
originalUserFullDescription: string, // Always the very first user input
style: PatentDrawingStyle,
iterationContext: { currentIteration: number, maxIterations: number }
): string => {
let basePromptTemplate: string;
const { instructionsText: figureAndLabelContext, primaryFigureNum } = extractFigureLabelInstructions(originalUserFullDescription);
switch (style) {
case PatentDrawingStyle.ITERATIVE_INITIAL:
basePromptTemplate = PROMPT_STYLES.ITERATIVE_INITIAL_BASE;
break;
case PatentDrawingStyle.ITERATIVE_AUTO_REFINE: // Should ideally not be called here if prompts are refined internally
basePromptTemplate = PROMPT_STYLES.ITERATIVE_AUTO_REFINE_BASE;
break;
default:
basePromptTemplate = PROMPT_STYLES.ITERATIVE_INITIAL_BASE;
}
return basePromptTemplate
.replace(/{subjectDescription}/g, subjectDescriptionForThisIteration)
.replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext)
.replace(/{currentIteration}/g, iterationContext.currentIteration.toString())
.replace(/{maxIterations}/g, iterationContext.maxIterations.toString())
.replace(/{primaryFigureIdentifierIfAny}/g, primaryFigureNum ? `Figure ${primaryFigureNum}` : 'the main described figure');
};
export const generatePatentDrawing = async (
fullApiPromptForImage: string, // This is now the fully constructed (and potentially refined) prompt
currentIterationNum: number,
maxIterations: number
): Promise<{ imageUrl: string, fullPromptUsed: string }> => {
if (!ai) {
throw new Error("Gemini API client is not initialized. Check API_KEY setup.");
}
try {
console.log(`Generating image, Iteration: ${currentIterationNum}/${maxIterations}`);
// console.log("Full prompt for IMAGE API:", fullApiPromptForImage);
const response: GenerateImagesResponse = await ai.models.generateImages({
model: GEMINI_IMAGE_MODEL,
prompt: fullApiPromptForImage,
config: {
numberOfImages: 1,
outputMimeType: 'image/png',
},
});
if (response.generatedImages && response.generatedImages.length > 0 && response.generatedImages[0].image?.imageBytes) {
const imageBytes = response.generatedImages[0].image.imageBytes;
const imageUrl = `data:image/png;base64,${imageBytes}`;
return { imageUrl, fullPromptUsed: fullApiPromptForImage };
} else {
console.error("API response missing image data:", response);
throw new Error("No image data received from the API. The response might be empty or malformed.");
}
} catch (error: any) {
console.error('Error generating image with Gemini API:', error);
if (error.message && error.message.includes("API_KEY")) {
throw new Error(`API Key error: ${error.message}. Please ensure your API_KEY is valid and has permissions for the Imagen API.`);
}
if (error.message && error.message.toLowerCase().includes("quota")) {
throw new Error("API quota exceeded. Please check your Gemini API quota limits.");
}
if (error.response && error.response.data) {
console.error("Gemini API Error Response Data:", error.response.data);
}
throw new Error(`Gemini API error during image generation: ${error.message || 'Unknown error'}`);
}
};
export const refineTextDescriptionForPatentDrawing = async (
currentTextDescription: string,
originalUserDescription: string, // For context of original figure/label intents
textIteration: number
): Promise<TextRefinementOutput> => {
if (!ai) throw new Error("Gemini API client not initialized.");
const { instructionsText: figureAndLabelContext } = extractFigureLabelInstructions(originalUserDescription);
const promptForTextAPI = PROMPT_STYLES.TEXT_COITERATION_REFINE_PROMPT
.replace('{currentTextDescription}', currentTextDescription)
.replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext)
.replace('{textIteration}', textIteration.toString());
const fallbackResult = (errorMsg: string): TextRefinementOutput => ({
refinedText: currentTextDescription,
clarificationRequests: [errorMsg],
fullPromptUsed: promptForTextAPI
});
try {
console.log(`Refining user text description, input for Text Iteration: ${textIteration}`);
// console.log("Full prompt for TEXT CO-ITERATION API:", promptForTextAPI);
const response: GenerateContentResponse = await ai.models.generateContent({
model: GEMINI_TEXT_MODEL,
contents: promptForTextAPI,
config: { responseMimeType: "application/json" }
});
let jsonStr = response.text.trim();
const fenceMatch = jsonStr.match(/^```(\w*)?\s*\n?(.*?)\n?\s*```$/s);
if (fenceMatch && fenceMatch[2]) jsonStr = fenceMatch[2].trim();
try {
const parsedData = JSON.parse(jsonStr);
if (typeof parsedData.refinedDescription === 'string' && Array.isArray(parsedData.clarificationRequests)) {
return {
refinedText: parsedData.refinedDescription.trim() || currentTextDescription,
clarificationRequests: parsedData.clarificationRequests.filter((req: any): req is string => typeof req === 'string' && req.trim() !== ''),
fullPromptUsed: promptForTextAPI
};
}
console.warn("Text co-iteration API returned JSON with unexpected structure:", parsedData);
return fallbackResult("Text co-iteration AI returned unexpected data structure. Using previous text.");
} catch (e) {
console.error("Failed to parse JSON from text co-iteration API:", e, "\nRaw response:", jsonStr);
return fallbackResult("Text co-iteration AI returned non-JSON or malformed JSON. Using previous text.");
}
} catch (error: any) {
console.error('Error in refineTextDescriptionForPatentDrawing:', error);
return fallbackResult(`Text co-iteration API call failed: ${error.message || "Unknown error"}. Using previous text.`);
}
};
export const refineInternalImageGenInstructions = async (
lastUsedImagePrompt: string,
originalUserDescription: string, // For overall context
primaryFigureNumFromExtraction: string | null // To maintain focus
): Promise<InternalRefinementOutput> => {
if (!ai) throw new Error("Gemini API client not initialized.");
const { instructionsText: figureAndLabelContext } = extractFigureLabelInstructions(originalUserDescription);
const promptForCritiqueAPI = PROMPT_STYLES.INTERNAL_PROMPT_CRITIQUE_AND_REFINE
.replace(/{originalUserDescription}/g, originalUserDescription)
.replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext)
.replace(/{lastUsedImagePrompt}/g, lastUsedImagePrompt)
.replace(/{primaryFigureIdentifierIfAny}/g, primaryFigureNumFromExtraction ? `Figure ${primaryFigureNumFromExtraction}` : 'the main described figure');
const fallbackResult = (errorMsg: string): InternalRefinementOutput => ({
refinedImagePrompt: lastUsedImagePrompt, // Fallback to the last used prompt
clarificationRequestsOrWarnings: [errorMsg],
fullPromptUsedForCritique: promptForCritiqueAPI
});
try {
console.log("Internally refining image generation prompt...");
// console.log("Full prompt for INTERNAL PROMPT CRITIQUE API:", promptForCritiqueAPI);
const response: GenerateContentResponse = await ai.models.generateContent({
model: GEMINI_TEXT_MODEL,
contents: promptForCritiqueAPI,
config: { responseMimeType: "application/json" }
});
let jsonStr = response.text.trim();
const fenceMatch = jsonStr.match(/^```(\w*)?\s*\n?(.*?)\n?\s*```$/s);
if (fenceMatch && fenceMatch[2]) jsonStr = fenceMatch[2].trim();
try {
const parsedData = JSON.parse(jsonStr);
if (typeof parsedData.refinedImagePrompt === 'string' && Array.isArray(parsedData.clarificationRequestsOrWarnings)) {
return {
refinedImagePrompt: parsedData.refinedImagePrompt.trim() || lastUsedImagePrompt,
clarificationRequestsOrWarnings: parsedData.clarificationRequestsOrWarnings.filter((req: any): req is string => typeof req === 'string' && req.trim() !== ''),
fullPromptUsedForCritique: promptForCritiqueAPI
};
}
console.warn("Internal prompt refinement API returned JSON with unexpected structure:", parsedData);
return fallbackResult("Internal AI critique returned unexpected data structure. Using previous image prompt.");
} catch (e) {
console.error("Failed to parse JSON from internal prompt refinement API:", e, "\nRaw response:", jsonStr);
return fallbackResult("Internal AI critique returned non-JSON or malformed JSON. Using previous image prompt.");
}
} catch (error: any) {
console.error('Error in refineInternalImageGenInstructions:', error);
return fallbackResult(`Internal AI critique API call failed: ${error.message || "Unknown error"}. Using previous image prompt.`);
}
};