首次提交

2026-01-15 14:20:44 +08:00
commit af250800d6
10 changed files with 264 additions and 0 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,23 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="9">
+            <item index="0" class="java.lang.String" itemvalue="sentence_transformers" />
+            <item index="1" class="java.lang.String" itemvalue="fast_langdetect" />
+            <item index="2" class="java.lang.String" itemvalue="jieba_fast" />
+            <item index="3" class="java.lang.String" itemvalue="rotary_embedding_torch" />
+            <item index="4" class="java.lang.String" itemvalue="ko_pron" />
+            <item index="5" class="java.lang.String" itemvalue="pyopenjtalk" />
+            <item index="6" class="java.lang.String" itemvalue="g2p_en" />
+            <item index="7" class="java.lang.String" itemvalue="huggingface_hub" />
+            <item index="8" class="java.lang.String" itemvalue="x_transformers" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (python_script)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (python_script)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/python_script.iml" filepath="$PROJECT_DIR$/.idea/python_script.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/python_script.iml
+++ b/.idea/python_script.iml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/根据文章内容生成标签/pycache/extract_doc_tag.cpython-312.pyc
+++ b/根据文章内容生成标签/pycache/extract_doc_tag.cpython-312.pyc
--- a/根据文章内容生成标签/extract_doc_tag.py
+++ b/根据文章内容生成标签/extract_doc_tag.py
@@ -0,0 +1,138 @@
+import requests
+import json
+import re
+import time  # 导入 time 模块
+
+OLLAMA_BASE_URL = "http://127.0.0.1:11434/api/generate"
+def extract_tags_with_ollama_from_content(model, article_content, max_tags=10, min_length=2, max_length=6):
+    """
+    使用本地 Ollama 大语言模型分析文章内容并提取关键词。
+
+    Args:
+        article_content (str): 输入的文章内容字符串。
+        max_tags (int): 希望返回的最大标签数量。
+        min_length (int): 关键词最小长度。
+        max_length (int): 关键词最大长度。
+
+    Returns:
+        list: 提取到的关键词/标签列表。
+    """
+    if not article_content or not article_content.strip():
+        return {
+            'code': 0,
+            'message': 'success',
+            'data': {
+                'model': model,
+                'think': '警告：输入的文章内容为空或仅包含空白字符。',
+                'tags': [],
+                'consume': 0.0
+            }
+        }
+
+    # 2. 构建 Prompt
+    # 使用中文提示，更符合 qwen 模型的特点
+    prompt = f"""
+请严格按照以下要求，从提供的文章内容中提取关键词。
+
+文章内容:
+{article_content}
+
+要求:
+- 提取最多 {max_tags} 个最能概括文章主旨和核心概念的关键词。
+- 关键词必须来源于文章内容，准确反映文章主题。
+- 每个关键词的长度必须在 {min_length} 到 {max_length} 个字符之间。
+- 输出格式为：关键词1, 关键词2, 关键词3, ...
+- 只输出关键词列表，不要有任何其他解释或前缀。
+
+"""
+    # 记录开始时间
+    start_time = time.time()
+    # 3. 准备发送给 Ollama API 的 payload
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "system": "你是一个专门生成文章标签的助手，请你根据我给你的文章的内容总结并生成一系列的标签，格式可以参考[关键词1, 关键词2, 关键词3].你只需要给我生成这种形式的标签即可，其他分析内容无需输出.",
+        "stream": False,
+        "options": {
+            "top_p": 0.9,
+            "temperature": 0.1,  # 较低的温度使输出更确定、更聚焦
+            "num_predict": 64000,  # 控制预测的最大 token 数
+        }
+    }
+
+    try:
+        # 4. 发送 POST 请求到 Ollama API
+        response = requests.post(OLLAMA_BASE_URL, json=payload)
+        # 5. 检查响应状态
+        if response.status_code != 200:
+            # print(f"Error: Ollama API returned status code {response.status_code}")
+            print(response.text)
+            return {
+                'code': 404,
+                'message': f"{response['error']}",
+            }
+        # 6. 解析 JSON 响应
+        result = response.json()
+        if "response" not in result:
+            print(result)
+            return {
+                'code': 500,
+                'message': "Error: Unexpected response format from Ollama",
+            }
+
+        llm_output = result["response"].strip()
+        # print(llm_output)
+        think_match = re.search(r'<think>(.*?)</think>', llm_output, re.DOTALL)
+        ai_think = think_match.group(1).strip() if think_match else ""
+        # 移除 <think>...</think> 标签及其内容，得到纯净的关键词列表部分
+        clean_output = re.sub(r'<think>.*?</think>', '', llm_output, count=1, flags=re.DOTALL).strip()
+        # 7. 简单清洗和验证关键词
+        # 假设 LLM 输出格式为 "关键词1, 关键词2, ..."
+        raw_tags = [tag.strip() for tag in clean_output.split(',') if tag.strip()]
+        # print(raw_tags)
+        # 过滤掉不符合长度要求的词
+        # filtered_tags = [
+        #     tag for tag in raw_tags
+        #     if min_length <= len(tag) <= max_length and tag  # 忽略空字符串
+        # ]
+        # 去重并保持顺序
+        seen = set()
+        unique_filtered_tags = []
+        for tag in raw_tags:
+            if tag not in seen:
+                seen.add(tag)
+                unique_filtered_tags.append(tag)
+
+        # 记录结束时间
+        end_time = time.time()
+        # 计算耗时
+        elapsed_time = end_time - start_time
+        return {
+            'code':0,
+            'message': 'success',
+            'data': {
+                'model': model,
+                'think': ai_think,
+                'tags': unique_filtered_tags,
+                'consume': elapsed_time
+            }
+        }
+
+    except requests.exceptions.RequestException as e:
+        # print(f"Error calling Ollama API: {e}")
+        return {
+            'code': 500,
+            'message': f"Error calling Ollama API: {e}",
+        }
+    except json.JSONDecodeError as e:
+        # print(f"Error decoding JSON response from Ollama: {e}")
+        return {
+            'code': 500,
+            'message': f"Error decoding JSON response from Ollama: {e}"
+        }
+    except Exception as e:
+        # print(f"An unexpected error occurred: {e}")
+        return {
+            'code': 500,
+            'message': f"An unexpected error occurred: {e}"
+        }
--- a/根据文章内容生成标签/main.py
+++ b/根据文章内容生成标签/main.py
@@ -0,0 +1,36 @@
+import extract_doc_tag as ex_doc_tag
+
+# --- 方式一：指定文件路径 ---
+file_path = 'test.txt'
+
+try:
+    with open(file_path, 'r', encoding='utf-8') as file:
+        my_article_content = file.read()
+except FileNotFoundError:
+    print(f"错误：找不到文件 '{file_path}'")
+    exit(1)
+except Exception as e:
+    print(f"读取文件时发生错误: {e}")
+    exit(1)
+
+model = "qwen3:1.7b"
+extracted_tags = ex_doc_tag.extract_tags_with_ollama_from_content(
+    model,
+    my_article_content,
+    max_tags=8,
+    min_length=2,
+    max_length=6
+)
+
+if extracted_tags['code'] == 0:
+    print('思考过程：')
+    print(extracted_tags['data']['think'])
+    print('=' * 60)
+    print('文章内容长度', len(my_article_content))
+    # print(f"Ollama 模型：{model}")
+    print('=' * 60)
+    print('文章标签：', extracted_tags['data']['tags'])
+    print('=' * 60)
+    print(f"总耗时: {extracted_tags['data']['consume']:.2f} 秒")
+else:
+    print(extracted_tags)
--- a/根据文章内容生成标签/test.txt
+++ b/根据文章内容生成标签/test.txt
@@ -0,0 +1,28 @@
+
+一位作家曾说过：“我想换一种方式，换一只手，也许是练好字的一个新契机。”由此可见，尝试换一只手做事，说不定能遇见全新的自己，将目光移开，可能会发现更广阔的天地，发现新的机遇。‍
+
+
+< 01 >要善于发现机会
+生活中，有人埋怨大展宏图的机会很少。
+殊不知，在你不以为然的事情中，恰恰藏着机会的钥匙。
+那些逆袭转运的人，会主动从危机中发现转机，在困境中寻觅出口，一旦发现契机，便会毫不犹豫抓住，打破困局、迎来重生。
+
+< 02 >要有把握机会的勇气
+很多时候，人生变好不在于能力的高下、学识的多少，而在于你是否具备打开新思路、把握新时机的勇气。
+很喜欢一句话：“决定我们成为什么样人的，不是我们的能力，而是我们的选择。”
+换个角度看世界，世界也会以更广阔的面貌回馈于你。
+
+< 03 >要为机会做出努力
+人生有种困境叫路径依赖。
+有时候，阻碍我们发展进步的，恰恰是那些不断被证明有效的经验。
+过分依赖路径，内在的智慧就难以发挥。没有行动，什么都不会发生。
+这个世界上，大部分人最缺的不是机遇，而是抓住机会的执行力。
+光想不做，只会让梦想落空，永远困于原地。
+唯有敲碎经验牢笼，才能在机遇一闪而过时快速把握、顺势而为。
+摒弃干扰因素，放下惯性洞察，不断评估和修正自己。
+不盲从、不武断地抓住破局点，做出明智判断，开启全新人生。
+“挡在你面前的，只有你自己。”
+人这一生，需要不断刷新自己、重塑自己。
+转变思维，拆掉内心的高墙，难题才能迎刃而解；换另一只手做事，方可不断成长、越来越强。
+
+