M-AzurLaneAutoScript/module/ocr/api_ocr.py

import sys
sys.path.append(r'C:/Users/W1NDe/Documents/GitHub/M-AzurLaneAutoScript')
import requests
from module.log_res.log_res import logger
import base64,requests,cv2,datetime, hashlib, hmac, json
from urllib.parse import quote, urlencode


class BaiduOcr:
    def __init__(self,config,api_key=None,secret_key=None):
        self.name = "baidu"
        self.api_key = api_key or config.DropRecord_BaiduAPIKey
        self.secret_key = secret_key or config.DropRecord_BaiduAPISecret
        self.access_token = self._get_access_token(self.api_key, self.secret_key)

    def _get_access_token(self,client_id="xxxx", client_secret="yyyy"):
        """
        Get Baidu OCR API access token

        Returns:
            str: access_token
        """
        url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={client_id}&client_secret={client_secret}"

        payload = ""
        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }

        response = requests.request("POST", url, headers=headers, data=payload)
        result = response.json()
        if 'access_token' in result:
            return result['access_token']
        else:
            logger.warning('Failed to get Baidu OCR access token')
            return None

    def request_ocr(self,image,model="general_basic"):
        # Convert image to base64
        _, buffer = cv2.imencode('.png', image)
        img_base64 = base64.b64encode(buffer).decode('utf-8')

        # Call Baidu OCR API
        request_url = f"https://aip.baidubce.com/rest/2.0/ocr/v1/{model}"
        params = {"image": img_base64}
        access_token = self.access_token
        if not access_token:
            logger.warning('Failed to get access token, cannot request Baidu OCR API')
            return False

        request_url = request_url + "?access_token=" + access_token
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        response = requests.post(request_url, data=params, headers=headers)

        if response:
            result = response.json()
            return result
        else:
            logger.warning('Failed to call Baidu OCR API')
            return False

class VolcOcr:
    """
    Provides an interface to the Volcengine OCR (Optical Character Recognition) service.

    This class handles the V4 signing process required for authenticating requests
    to the Volcengine visual services API (cv).
    """
    def __init__(self, config, api_key=None, secret_key=None):
        """
        Initializes the VolcOcr client.

        :param config: A configuration object, expected to have DropRecord_VolcAPIKey and DropRecord_VolcAPISecret attributes.
        :param api_key: Your Volcengine Access Key ID. Overrides the value from the config.
        :param secret_key: Your Volcengine Secret Access Key. Overrides the value from the config.
        """
        self.name = "volc"
        # Credentials from config or direct parameters
        self.ak = api_key or getattr(config, 'DropRecord_VolcAPIKey', None)
        self.sk = secret_key or getattr(config, 'DropRecord_VolcAPISecret', None)

        # Static parameters for the Volcengine CV OCRNormal service
        self.service = "cv"
        self.version = "2020-08-26"
        self.region = "cn-north-1"
        self.host = "visual.volcengineapi.com"
        self.content_type = "application/x-www-form-urlencoded"
        self.action = "OCRNormal"
        self.method = "POST"

    # Volcengine signing helper functions
    @staticmethod
    def hmac_sha256(key: bytes, content: str):
        return hmac.new(key, content.encode("utf-8"), hashlib.sha256).digest()


    @staticmethod
    def hash_sha256(content: str):
        return hashlib.sha256(content.encode("utf-8")).hexdigest()


    @staticmethod
    def norm_query(params):
        query = ""
        for key in sorted(params.keys()):
            if type(params[key]) == list:
                for k in params[key]:
                    query = (
                            query + quote(key, safe="-_.~") + "=" + quote(k, safe="-_.~") + "&"
                    )
            else:
                query = (query + quote(key, safe="-_.~") + "=" + quote(params[key], safe="-_.~") + "&")
        query = query[:-1]
        return query.replace("+", "%20")

    def request_ocr(self, image=None, image_url=None, image_base64=None,
                         approximate_pixel=None, mode=None,
                         filter_thresh=None, half_to_full=None,model=None):
        if image is None and image_url is None and image_base64 is None:
            logger.error("Either 'image', 'image_url', or 'image_base64' must be provided for Volcengine OCR.")
            raise ValueError("Either 'image', 'image_url', or 'image_base64' must be provided.")

        body_params = {}
        if image_base64:
            body_params['image_base64'] = image_base64
        elif image is not None:
            if image_url:
                logger.warning("Both 'image' and 'image_url' provided. 'image' will be used.")

            # Per documentation, JPG format is recommended for better compatibility.
            success, buffer = cv2.imencode('.jpg', image)
            if not success:
                logger.error("Failed to encode image as JPEG.")
                return None
            img_base64_str = base64.b64encode(buffer).decode('utf-8')
            body_params['image_base64'] = img_base64_str
        elif image_url:
            body_params['image_url'] = image_url

        # Add optional parameters if they are provided, using snake_case as per API documentation.
        if approximate_pixel is not None:
            body_params['approximate_pixel'] = str(approximate_pixel)
        if mode is not None:
            body_params['mode'] = mode
        if filter_thresh is not None:
            body_params['filter_thresh'] = str(filter_thresh)
        if half_to_full is not None:
            body_params['half_to_full'] = str(half_to_full).lower()

        raw_response = self._signed_request(query={}, body_params=body_params)
        return self._format_response(raw_response)

    def _format_response(self, response):
        """
        Formats the raw Volcengine OCR response into a standardized structure.

        :param response: The raw JSON dictionary from the Volcengine API.
        :return: A dictionary with a single 'words_result' key, or the original
                 response if formatting fails or the response was an error.
        """
        if not response or 'data' not in response or not response.get('data'):
            logger.warning("Volcengine OCR response is invalid or contains no data; returning raw response.")
            return response

        try:
            words_result = []
            line_texts = response['data'].get('line_texts', [])
            # The 'line_rects' field contains the bounding box for each text line.
            # We assume it's a list of dictionaries, each with 'X', 'Y', 'Width', 'Height'.
            line_rects = response['data'].get('line_rects', [])

            if len(line_texts) != len(line_rects):
                logger.warning("Mismatch between number of text lines and bounding boxes in Volcengine response.")

            for text, rect in zip(line_texts, line_rects):
                location = {
                    'top': rect.get('y', 0),
                    'left': rect.get('x', 0),
                    'width': rect.get('width', 0),
                    'height': rect.get('height', 0)
                }
                words_result.append({
                    'words': text,
                    'location': location
                })

            return {'words_result': words_result}
        except (TypeError, KeyError) as e:
            logger.error(f"Failed to parse Volcengine OCR response data: {e}")
            return response # Return original response if parsing fails

    def _signed_request(self, query, body_params):
        """
        Orchestrates the signing and sending of a request to the Volcengine API.

        :param query: A dictionary of query parameters.
        :param body_params: A dictionary of body parameters (pre-urlencoding).
        :return: The JSON response from the API as a dictionary, or None on failure.
        """
        if not self.ak or not self.sk:
            logger.error("Volcengine OCR AK or SK not configured.")
            return None

        # The body must be url-encoded before signing.
        body = urlencode(body_params)

        request_param = {
            "body": body,
            "host": self.host,
            "path": "/",
            "method": self.method,
            "content_type": self.content_type,
            "date": datetime.datetime.utcnow(),
            "query": {"Action": self.action, "Version": self.version, **query},
        }

        try:
            # Generate all necessary headers, including the signature.
            headers = self._get_signed_headers(request_param)

            # Send the authenticated request.
            r = requests.request(
                method=self.method,
                url=f"https://{self.host}/",
                headers=headers,
                params=request_param["query"],
                data=body.encode("utf-8"),
            )
            r.raise_for_status()
            return r.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"Request to Volcengine failed: {e}")
            if hasattr(e, 'response') and e.response is not None:
                logger.error("--- VOLCENGINE ERROR RESPONSE ---")
                logger.error(f"STATUS CODE: {e.response.status_code}")
                try:
                    error_details = e.response.json()
                    logger.error(f"RESPONSE JSON:\n{json.dumps(error_details, indent=2)}")
                except json.JSONDecodeError:
                    logger.error(f"RESPONSE BODY:\n{e.response.text}")
                logger.error("--- END OF RESPONSE ---")
            return None

    def _get_signed_headers(self, request_param):
        """
        Calculates the V4 signature and returns the complete headers for the request.

        This function implements the four-step signature calculation process
        described in the Volcengine API documentation.

        :param request_param: A dictionary containing all request details.
        :return: A dictionary of headers ready to be sent.
        """
        # Step 1: Create a CanonicalRequest.
        # This is a standardized representation of the request.
        x_date = request_param["date"].strftime("%Y%m%dT%H%M%SZ")
        x_content_sha256 = self.hash_sha256(request_param["body"])
        signed_headers_str = ";".join(["content-type", "host", "x-content-sha256", "x-date"])

        canonical_request = "\n".join([
            request_param["method"].upper(),
            request_param["path"],
            self.norm_query(request_param["query"]),
            "\n".join([
                "content-type:" + request_param["content_type"],
                "host:" + request_param["host"],
                "x-content-sha256:" + x_content_sha256,
                "x-date:" + x_date,
            ]),
            "",
            signed_headers_str,
            x_content_sha256,
        ])

        # Step 2: Create the StringToSign.
        # This string combines metadata about the request and the hashed CanonicalRequest.
        short_x_date = x_date[:8]
        hashed_canonical_request = self.hash_sha256(canonical_request)
        credential_scope = f"{short_x_date}/{self.region}/{self.service}/request"
        string_to_sign = "\n".join(["HMAC-SHA256", x_date, credential_scope, hashed_canonical_request])

        # Step 3: Calculate the Signature.
        # The signature is derived from a series of HMAC-SHA256 hashes.
        k_date = self.hmac_sha256(self.sk.encode("utf-8"), short_x_date)
        k_region = self.hmac_sha256(k_date, self.region)
        k_service = self.hmac_sha256(k_region, self.service)
        k_signing = self.hmac_sha256(k_service, "request")
        signature = self.hmac_sha256(k_signing, string_to_sign).hex()

        # Step 4: Assemble the Authorization header and other required headers.
        authorization = (
            f"HMAC-SHA256 Credential={self.ak}/{credential_scope}, "
            f"SignedHeaders={signed_headers_str}, Signature={signature}"
        )

        headers = {
            "Authorization": authorization,
            "Content-Type": request_param["content_type"],
            "Host": request_param["host"],
            "X-Content-Sha256": x_content_sha256,
            "X-Date": x_date,
        }
        return headers


if __name__ == "__main__":
    volc_ocr = VolcOcr(None,"",
                  "==")
    baidu_ocr = BaiduOcr(None,"",
                  "")
    image_path = "C:/Users/W1NDe/Documents/GitHub/M-AzurLaneAutoScript/module/ocr/test.png"

    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to read image from path: {image_path}")
    else:
        result = volc_ocr.request_ocr(image=image)
        result2 = baidu_ocr.request_ocr(image=image,model="general")
        print(result)
        # print(result2)