import { GoogleGenAI, GenerateContentResponse, GenerateImagesResponse } from "@google/genai"; import { GEMINI_IMAGE_MODEL, GEMINI_TEXT_MODEL, PROMPT_STYLES } from '../constants'; import type { TextRefinementOutput, InternalRefinementOutput } from "../types"; export enum PatentDrawingStyle { ITERATIVE_INITIAL, ITERATIVE_AUTO_REFINE, } interface ExtractedFigureLabelInfo { instructionsText: string; primaryFigureNum: string | null; rawExtractedElements: string[]; } let ai: GoogleGenAI | null = null; try { if (!process.env.API_KEY) { throw new Error("API_KEY environment variable is not set."); } ai = new GoogleGenAI({ apiKey: process.env.API_KEY }); } catch (e) { console.error("Failed to initialize GoogleGenAI:", e); } export const extractFigureLabelInstructions = (userInput: string): ExtractedFigureLabelInfo => { let cleanedInput = userInput; cleanedInput = cleanedInput.replace(/\[\d{2,}\]/g, ''); const measurementPattern = /\b\d+(\.\d+)?\s*(?:K|kHz|MHz|GHz|μs|ms|ns|microseconds|milliseconds|seconds|min|hr|C|F|Kelvin|Pa|kPa|MPa|bar|psi|N|kN|V|mV|kV|A|mA|μA|W|mW|kW|Hz|Ω|kΩ|MΩ|dB|lm|cd|mol|g|kg|mg|μg|L|mL|mm|cm|m|km)\b/gi; cleanedInput = cleanedInput.replace(measurementPattern, ''); const patterns = [ /\b(?:figure|fig)\s*\.?\s*([\w\d]+(?:[a-zA-Z](?![a-zA-Z]))?)\b/gi, /\b(?:label|item|part|element|ref|reference|indicator|designator)\s+([a-zA-Z0-9][\w.-]*[a-zA-Z0-9]?)\b/gi, /\b[a-zA-Z][\w\s-]*[a-zA-Z0-9]\s*\(([a-zA-Z0-9][\w.-]*[a-zA-Z0-9]?)\)/gi, /\b([A-Z]+-?\d+[a-zA-Z]?|\d+[a-zA-Z]?-?[A-Z]?)(?![a-zA-Z0-9-])\b/gi, ]; let instructionsSet = new Set<string>(); patterns.forEach(pattern => { const matches = cleanedInput.matchAll(pattern); for (const match of matches) { let anInstruction = (match[1] || match[0]).trim(); if (anInstruction.length < 1) continue; if (anInstruction.length === 1 && !/^[A-Za-z0-9]$/.test(anInstruction)) continue; if (anInstruction.length > 30 && !(anInstruction.toLowerCase().startsWith("figure") || anInstruction.toLowerCase().startsWith("fig"))) continue; if (/^\d+$/.test(anInstruction) && parseInt(anInstruction) > 999 && !(match[0].toLowerCase().startsWith("figure") || match[0].toLowerCase().startsWith("fig"))) continue; if (/^[A-Z][a-z]+$/.test(anInstruction) && anInstruction.length > 2 && !match[0].toLowerCase().includes("fig") && !match[0].toLowerCase().includes("label")) continue; instructionsSet.add(anInstruction); } }); const uniqueInstructions = Array.from(instructionsSet); let primaryFigureNum: string | null = null; if (uniqueInstructions.length > 0) { const figureNumberCounts: Record<string, number> = {}; uniqueInstructions.forEach(instr => { const figMatch = instr.match(/\b(?:figure|fig)\s*\.?\s*([\w\d]+(?:[a-zA-Z](?![a-zA-Z]))?)\b/i); if (figMatch && figMatch[1]) { const figNum = figMatch[1]; if (instr.toLowerCase().startsWith("figure") || instr.toLowerCase().startsWith("fig")) { figureNumberCounts[figNum] = (figureNumberCounts[figNum] || 0) + 1; if (!primaryFigureNum || figureNumberCounts[figNum] > figureNumberCounts[primaryFigureNum!]) { primaryFigureNum = figNum; } } } }); } let baseInstructionText: string; if (uniqueInstructions.length === 0) { baseInstructionText = "No specific figure numbers or explicit labels were parsed from the description. AI should ensure no text is added to the drawing unless a figure number is clearly implied by the main description."; } else { baseInstructionText = `Potential Figure/Label Information (AI must discern actual labels/figures to render): "${uniqueInstructions.join("; ")}".`; } let fullInstructionsText = baseInstructionText; if (primaryFigureNum) { fullInstructionsText += ` The primary figure number identified appears to be '${primaryFigureNum}'. AI should focus on rendering this figure number if applicable.`; } return { instructionsText: fullInstructionsText, primaryFigureNum: primaryFigureNum, rawExtractedElements: uniqueInstructions }; }; // This function now primarily serves to construct the INITIAL image prompt. // Subsequent prompts are refined by refineInternalImageGenInstructions. export const constructInitialImagePrompt = ( subjectDescriptionForThisIteration: string, // This could be original or co-iterated text originalUserFullDescription: string, // Always the very first user input style: PatentDrawingStyle, iterationContext: { currentIteration: number, maxIterations: number } ): string => { let basePromptTemplate: string; const { instructionsText: figureAndLabelContext, primaryFigureNum } = extractFigureLabelInstructions(originalUserFullDescription); switch (style) { case PatentDrawingStyle.ITERATIVE_INITIAL: basePromptTemplate = PROMPT_STYLES.ITERATIVE_INITIAL_BASE; break; case PatentDrawingStyle.ITERATIVE_AUTO_REFINE: // Should ideally not be called here if prompts are refined internally basePromptTemplate = PROMPT_STYLES.ITERATIVE_AUTO_REFINE_BASE; break; default: basePromptTemplate = PROMPT_STYLES.ITERATIVE_INITIAL_BASE; } return basePromptTemplate .replace(/{subjectDescription}/g, subjectDescriptionForThisIteration) .replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext) .replace(/{currentIteration}/g, iterationContext.currentIteration.toString()) .replace(/{maxIterations}/g, iterationContext.maxIterations.toString()) .replace(/{primaryFigureIdentifierIfAny}/g, primaryFigureNum ? `Figure ${primaryFigureNum}` : 'the main described figure'); }; export const generatePatentDrawing = async ( fullApiPromptForImage: string, // This is now the fully constructed (and potentially refined) prompt currentIterationNum: number, maxIterations: number ): Promise<{ imageUrl: string, fullPromptUsed: string }> => { if (!ai) { throw new Error("Gemini API client is not initialized. Check API_KEY setup."); } try { console.log(`Generating image, Iteration: ${currentIterationNum}/${maxIterations}`); // console.log("Full prompt for IMAGE API:", fullApiPromptForImage); const response: GenerateImagesResponse = await ai.models.generateImages({ model: GEMINI_IMAGE_MODEL, prompt: fullApiPromptForImage, config: { numberOfImages: 1, outputMimeType: 'image/png', }, }); if (response.generatedImages && response.generatedImages.length > 0 && response.generatedImages[0].image?.imageBytes) { const imageBytes = response.generatedImages[0].image.imageBytes; const imageUrl = `data:image/png;base64,${imageBytes}`; return { imageUrl, fullPromptUsed: fullApiPromptForImage }; } else { console.error("API response missing image data:", response); throw new Error("No image data received from the API. The response might be empty or malformed."); } } catch (error: any) { console.error('Error generating image with Gemini API:', error); if (error.message && error.message.includes("API_KEY")) { throw new Error(`API Key error: ${error.message}. Please ensure your API_KEY is valid and has permissions for the Imagen API.`); } if (error.message && error.message.toLowerCase().includes("quota")) { throw new Error("API quota exceeded. Please check your Gemini API quota limits."); } if (error.response && error.response.data) { console.error("Gemini API Error Response Data:", error.response.data); } throw new Error(`Gemini API error during image generation: ${error.message || 'Unknown error'}`); } }; export const refineTextDescriptionForPatentDrawing = async ( currentTextDescription: string, originalUserDescription: string, // For context of original figure/label intents textIteration: number ): Promise<TextRefinementOutput> => { if (!ai) throw new Error("Gemini API client not initialized."); const { instructionsText: figureAndLabelContext } = extractFigureLabelInstructions(originalUserDescription); const promptForTextAPI = PROMPT_STYLES.TEXT_COITERATION_REFINE_PROMPT .replace('{currentTextDescription}', currentTextDescription) .replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext) .replace('{textIteration}', textIteration.toString()); const fallbackResult = (errorMsg: string): TextRefinementOutput => ({ refinedText: currentTextDescription, clarificationRequests: [errorMsg], fullPromptUsed: promptForTextAPI }); try { console.log(`Refining user text description, input for Text Iteration: ${textIteration}`); // console.log("Full prompt for TEXT CO-ITERATION API:", promptForTextAPI); const response: GenerateContentResponse = await ai.models.generateContent({ model: GEMINI_TEXT_MODEL, contents: promptForTextAPI, config: { responseMimeType: "application/json" } }); let jsonStr = response.text.trim(); const fenceMatch = jsonStr.match(/^```(\w*)?\s*\n?(.*?)\n?\s*```$/s); if (fenceMatch && fenceMatch[2]) jsonStr = fenceMatch[2].trim(); try { const parsedData = JSON.parse(jsonStr); if (typeof parsedData.refinedDescription === 'string' && Array.isArray(parsedData.clarificationRequests)) { return { refinedText: parsedData.refinedDescription.trim() || currentTextDescription, clarificationRequests: parsedData.clarificationRequests.filter((req: any): req is string => typeof req === 'string' && req.trim() !== ''), fullPromptUsed: promptForTextAPI }; } console.warn("Text co-iteration API returned JSON with unexpected structure:", parsedData); return fallbackResult("Text co-iteration AI returned unexpected data structure. Using previous text."); } catch (e) { console.error("Failed to parse JSON from text co-iteration API:", e, "\nRaw response:", jsonStr); return fallbackResult("Text co-iteration AI returned non-JSON or malformed JSON. Using previous text."); } } catch (error: any) { console.error('Error in refineTextDescriptionForPatentDrawing:', error); return fallbackResult(`Text co-iteration API call failed: ${error.message || "Unknown error"}. Using previous text.`); } }; export const refineInternalImageGenInstructions = async ( lastUsedImagePrompt: string, originalUserDescription: string, // For overall context primaryFigureNumFromExtraction: string | null // To maintain focus ): Promise<InternalRefinementOutput> => { if (!ai) throw new Error("Gemini API client not initialized."); const { instructionsText: figureAndLabelContext } = extractFigureLabelInstructions(originalUserDescription); const promptForCritiqueAPI = PROMPT_STYLES.INTERNAL_PROMPT_CRITIQUE_AND_REFINE .replace(/{originalUserDescription}/g, originalUserDescription) .replace(/{figureAndLabelContextBlock}/g, figureAndLabelContext) .replace(/{lastUsedImagePrompt}/g, lastUsedImagePrompt) .replace(/{primaryFigureIdentifierIfAny}/g, primaryFigureNumFromExtraction ? `Figure ${primaryFigureNumFromExtraction}` : 'the main described figure'); const fallbackResult = (errorMsg: string): InternalRefinementOutput => ({ refinedImagePrompt: lastUsedImagePrompt, // Fallback to the last used prompt clarificationRequestsOrWarnings: [errorMsg], fullPromptUsedForCritique: promptForCritiqueAPI }); try { console.log("Internally refining image generation prompt..."); // console.log("Full prompt for INTERNAL PROMPT CRITIQUE API:", promptForCritiqueAPI); const response: GenerateContentResponse = await ai.models.generateContent({ model: GEMINI_TEXT_MODEL, contents: promptForCritiqueAPI, config: { responseMimeType: "application/json" } }); let jsonStr = response.text.trim(); const fenceMatch = jsonStr.match(/^```(\w*)?\s*\n?(.*?)\n?\s*```$/s); if (fenceMatch && fenceMatch[2]) jsonStr = fenceMatch[2].trim(); try { const parsedData = JSON.parse(jsonStr); if (typeof parsedData.refinedImagePrompt === 'string' && Array.isArray(parsedData.clarificationRequestsOrWarnings)) { return { refinedImagePrompt: parsedData.refinedImagePrompt.trim() || lastUsedImagePrompt, clarificationRequestsOrWarnings: parsedData.clarificationRequestsOrWarnings.filter((req: any): req is string => typeof req === 'string' && req.trim() !== ''), fullPromptUsedForCritique: promptForCritiqueAPI }; } console.warn("Internal prompt refinement API returned JSON with unexpected structure:", parsedData); return fallbackResult("Internal AI critique returned unexpected data structure. Using previous image prompt."); } catch (e) { console.error("Failed to parse JSON from internal prompt refinement API:", e, "\nRaw response:", jsonStr); return fallbackResult("Internal AI critique returned non-JSON or malformed JSON. Using previous image prompt."); } } catch (error: any) { console.error('Error in refineInternalImageGenInstructions:', error); return fallbackResult(`Internal AI critique API call failed: ${error.message || "Unknown error"}. Using previous image prompt.`); } };